From 8c949cec714751fbfd8c791b90f80f4e26b6d330 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 3 Sep 2025 19:18:11 +0200
Subject: [PATCH] PR #173 has been reverted. Additionally, for TrOCR, the
 cropped text lines will no longer be added to a list before prediction.
 Instead, for each batch size, the text line images will be collected and
 predictions will be made directly on them.

---
 src/eynollah/eynollah.py             | 452 ++++++++++++++++++++-------
 src/eynollah/utils/__init__.py       |  63 ++--
 src/eynollah/utils/contour.py        | 177 +++--------
 src/eynollah/utils/marginals.py      |   2 -
 src/eynollah/utils/separate_lines.py |  60 ++--
 src/eynollah/writer.py               |   4 +-
 6 files changed, 442 insertions(+), 316 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 396b4b9..eff9fa8 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -73,8 +73,6 @@ from .utils.contour import (
     return_contours_of_interested_region_by_min_size,
     return_contours_of_interested_textline,
     return_parent_contours,
-    dilate_textregion_contours,
-    dilate_textline_contours,
 )
 from .utils.rotate import (
     rotate_image,
@@ -112,8 +110,6 @@ from .utils.resize import resize_image
 from .utils import (
     boosting_headers_by_longshot_region_segmentation,
     crop_image_inside_box,
-    box2rect,
-    box2slice,
     find_num_col,
     otsu_copy_binary,
     put_drop_out_from_only_drop_model,
@@ -1750,7 +1746,7 @@ class Eynollah:
         self.logger.debug("exit extract_text_regions")
         return prediction_regions, prediction_regions2
 
-    def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
+    def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
 
         polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001)
         M_main_tot = [cv2.moments(polygons_of_textlines[j])
@@ -1773,17 +1769,18 @@ class Eynollah:
             all_found_textline_polygons.append(textlines_ins[::-1])
             slopes.append(slope_deskew)
 
-            crop_coor = box2rect(boxes[index])
+            _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated)
             all_box_coord.append(crop_coor)
 
         return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
 
-    def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
+    def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_light")
         results = self.executor.map(partial(do_work_of_slopes_new_light,
                                             textline_mask_tot_ea=textline_mask_tot,
+                                            image_page_rotated=image_page_rotated,
                                             slope_deskew=slope_deskew,textline_light=self.textline_light,
                                             logger=self.logger,),
                                     boxes, contours, contours_par, range(len(contours_par)))
@@ -1791,12 +1788,13 @@ class Eynollah:
         self.logger.debug("exit get_slopes_and_deskew_new_light")
         return tuple(zip(*results))
 
-    def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
+    def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new")
         results = self.executor.map(partial(do_work_of_slopes_new,
                                             textline_mask_tot_ea=textline_mask_tot,
+                                            image_page_rotated=image_page_rotated,
                                             slope_deskew=slope_deskew,
                                             MAX_SLOPE=MAX_SLOPE,
                                             KERNEL=KERNEL,
@@ -1807,12 +1805,13 @@ class Eynollah:
         self.logger.debug("exit get_slopes_and_deskew_new")
         return tuple(zip(*results))
 
-    def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
+    def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_curved")
         results = self.executor.map(partial(do_work_of_slopes_new_curved,
                                             textline_mask_tot_ea=textline_mask_tot,
+                                            image_page_rotated=image_page_rotated,
                                             mask_texts_only=mask_texts_only,
                                             num_col=num_col,
                                             scale_par=scale_par,
@@ -1994,9 +1993,9 @@ class Eynollah:
         mask_texts_only = (prediction_regions_org[:,:] ==1)*1
         mask_images_only=(prediction_regions_org[:,:] ==2)*1
 
-        polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
-        polygons_seplines = filter_contours_area_of_image(
-            mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+        polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
+        polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(
+            mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
 
         polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
         polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@@ -2035,7 +2034,7 @@ class Eynollah:
                 ##polygons_of_images_fin.append(ploy_img_ind)
 
                 box = cv2.boundingRect(ploy_img_ind)
-                page_coord_img = box2rect(box)
+                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
                 # cont_page.append(np.array([[page_coord[2], page_coord[0]],
                 #                            [page_coord[3], page_coord[0]],
                 #                            [page_coord[3], page_coord[1]],
@@ -2049,7 +2048,7 @@ class Eynollah:
             if h < 150 or w < 150:
                 pass
             else:
-                page_coord_img = box2rect(box)
+                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
                 # cont_page.append(np.array([[page_coord[2], page_coord[0]],
                 #                            [page_coord[3], page_coord[0]],
                 #                            [page_coord[3], page_coord[1]],
@@ -2060,7 +2059,7 @@ class Eynollah:
                                                         [page_coord_img[2], page_coord_img[1]]]))
 
         self.logger.debug("exit get_regions_extract_images_only")
-        return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page
+        return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
 
     def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
         self.logger.debug("enter get_regions_light_v")
@@ -2176,31 +2175,31 @@ class Eynollah:
             mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
             mask_images_only=(prediction_regions_org[:,:] ==2)*1
 
-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+            polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
             test_khat = np.zeros(prediction_regions_org.shape)
-            test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1))
+            test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1))
 
             #plt.imshow(test_khat[:,:])
             #plt.show()
             #for jv in range(1):
-                #print(jv, hir_seplines[0][232][3])
+                #print(jv, hir_lines_xml[0][232][3])
                 #test_khat = np.zeros(prediction_regions_org.shape)
-                #test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1))
+                #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1))
                 #plt.imshow(test_khat[:,:])
                 #plt.show()
 
-            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+            polygons_lines_xml = filter_contours_area_of_image(
+                mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
 
             test_khat = np.zeros(prediction_regions_org.shape)
-            test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1))
+            test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1))
 
             #plt.imshow(test_khat[:,:])
             #plt.show()
             #sys.exit()
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
-            ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts)
+            ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
 
             text_regions_p_true = np.zeros(prediction_regions_org.shape)
@@ -2218,7 +2217,7 @@ class Eynollah:
             #plt.show()
             #print("inside 4 ", time.time()-t_in)
             self.logger.debug("exit get_regions_light_v")
-            return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix
+            return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix
         else:
             img_bin = resize_image(img_bin,img_height_h, img_width_h )
             self.logger.debug("exit get_regions_light_v")
@@ -2301,9 +2300,9 @@ class Eynollah:
             mask_texts_only=(prediction_regions_org[:,:]==1)*1
             mask_images_only=(prediction_regions_org[:,:]==2)*1
 
-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
-            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+            polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
+            polygons_lines_xml = filter_contours_area_of_image(
+                mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
@@ -2315,7 +2314,7 @@ class Eynollah:
             text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
 
             self.logger.debug("exit get_regions_from_xy_2models")
-            return text_regions_p_true, erosion_hurts, polygons_seplines
+            return text_regions_p_true, erosion_hurts, polygons_lines_xml
         except:
             if self.input_binary:
                 prediction_bin = np.copy(img_org)
@@ -2350,9 +2349,9 @@ class Eynollah:
             mask_texts_only = (prediction_regions_org == 1)*1
             mask_images_only= (prediction_regions_org == 2)*1
 
-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
-            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+            polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
+            polygons_lines_xml = filter_contours_area_of_image(
+                mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@@ -2365,7 +2364,7 @@ class Eynollah:
 
             erosion_hurts = True
             self.logger.debug("exit get_regions_from_xy_2models")
-            return text_regions_p_true, erosion_hurts, polygons_seplines
+            return text_regions_p_true, erosion_hurts, polygons_lines_xml
 
     def do_order_of_regions_full_layout(
             self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
@@ -3233,13 +3232,13 @@ class Eynollah:
         if self.plotter:
             self.plotter.save_deskewed_image(slope_deskew)
         self.logger.info("slope_deskew: %.2f°", slope_deskew)
-        return slope_deskew
+        return slope_deskew, slope_first
 
     def run_marginals(
-            self, textline_mask_tot_ea, mask_images, mask_lines,
+            self, image_page, textline_mask_tot_ea, mask_images, mask_lines,
             num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
 
-        textline_mask_tot = textline_mask_tot_ea[:, :]
+        image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :]
         textline_mask_tot[mask_images[:, :] == 1] = 0
 
         text_regions_p_1[mask_lines[:, :] == 1] = 3
@@ -3257,7 +3256,10 @@ class Eynollah:
             except Exception as e:
                 self.logger.error("exception %s", e)
 
-        return textline_mask_tot, text_regions_p
+        if self.plotter:
+            self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
+            self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
+        return textline_mask_tot, text_regions_p, image_page_rotated
 
     def run_boxes_no_full_layout(
             self, image_page, textline_mask_tot, text_regions_p,
@@ -3409,7 +3411,7 @@ class Eynollah:
                 text_regions_p[:,:][table_prediction[:,:]==1] = 10
                 img_revised_tab = text_regions_p[:,:]
                 if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
+                    image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
                         rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
 
                     text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
@@ -3429,7 +3431,7 @@ class Eynollah:
 
             else:
                 if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
+                    image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
                         rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
 
                     text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
@@ -4280,9 +4282,7 @@ class Eynollah:
         
 
     def filter_contours_without_textline_inside(
-            self, contours, text_con_org, contours_textline,
-            contours_only_text_parent_d_ordered,
-            conf_contours_textregions):
+            self, contours,text_con_org,  contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions):
         ###contours_txtline_of_all_textregions = []
         ###for jj in range(len(contours_textline)):
             ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj]
@@ -4306,21 +4306,138 @@ class Eynollah:
             ###if np.any(results==1):
                 ###contours_with_textline.append(con_tr)
 
-        textregion_index_to_del = set()
+        textregion_index_to_del = []
         for index_textregion, textlines_textregion in enumerate(contours_textline):
-            if len(textlines_textregion) == 0:
-                textregion_index_to_del.add(index_textregion)
-        def filterfun(lis):
-            if len(lis) == 0:
-                return []
-            return list(np.delete(lis, list(textregion_index_to_del)))
+            if len(textlines_textregion)==0:
+                textregion_index_to_del.append(index_textregion)
 
-        return (filterfun(contours),
-                filterfun(text_con_org),
-                filterfun(conf_contours_textregions),
-                filterfun(contours_textline),
-                filterfun(contours_only_text_parent_d_ordered),
-                np.arange(len(contours) - len(textregion_index_to_del)))
+        uniqe_args_trs = np.unique(textregion_index_to_del)
+        uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1]
+
+        for ind_u_a_trs in uniqe_args_trs_sorted:
+            conf_contours_textregions.pop(ind_u_a_trs)
+            contours.pop(ind_u_a_trs)
+            contours_textline.pop(ind_u_a_trs)
+            text_con_org.pop(ind_u_a_trs)
+            if len(contours_only_text_parent_d_ordered) > 0:
+                contours_only_text_parent_d_ordered.pop(ind_u_a_trs)
+
+        return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours)))
+
+    def dilate_textlines(self, all_found_textline_polygons):
+        for j in range(len(all_found_textline_polygons)):
+            for i in range(len(all_found_textline_polygons[j])):
+                con_ind = all_found_textline_polygons[j][i]
+                con_ind = con_ind.astype(float)
+
+                x_differential = np.diff( con_ind[:,0,0])
+                y_differential = np.diff( con_ind[:,0,1])
+
+                x_min = float(np.min( con_ind[:,0,0] ))
+                y_min = float(np.min( con_ind[:,0,1] ))
+
+                x_max = float(np.max( con_ind[:,0,0] ))
+                y_max = float(np.max( con_ind[:,0,1] ))
+
+                if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70:
+                    x_biger_than_x = np.abs(x_differential) > np.abs(y_differential)
+                    mult = x_biger_than_x*x_differential
+
+                    arg_min_mult = np.argmin(mult)
+                    arg_max_mult = np.argmax(mult)
+
+                    if y_differential[0]==0:
+                        y_differential[0] = 0.1
+                    if y_differential[-1]==0:
+                        y_differential[-1]= 0.1
+                    y_differential = [y_differential[ind] if y_differential[ind] != 0
+                                      else 0.5 * (y_differential[ind-1] + y_differential[ind+1])
+                                      for ind in range(len(y_differential))]
+
+                    if y_differential[0]==0.1:
+                        y_differential[0] = y_differential[1]
+                    if y_differential[-1]==0.1:
+                        y_differential[-1] = y_differential[-2]
+                    y_differential.append(y_differential[0])
+
+                    y_differential = [-1 if y_differential[ind] < 0 else 1
+                                      for ind in range(len(y_differential))]
+                    y_differential = self.return_it_in_two_groups(y_differential)
+                    y_differential = np.array(y_differential)
+
+                    con_scaled = con_ind*1
+                    con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential
+                    con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8
+                    con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8
+
+                    try:
+                        con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5
+                        con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5
+                    except:
+                        pass
+
+                    con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8
+                    con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8
+
+                    try:
+                        con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5
+                        con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5
+                    except:
+                        pass
+
+                else:
+                    y_biger_than_x = np.abs(y_differential) > np.abs(x_differential)
+                    mult = y_biger_than_x*y_differential
+
+                    arg_min_mult = np.argmin(mult)
+                    arg_max_mult = np.argmax(mult)
+
+                    if x_differential[0]==0:
+                        x_differential[0] = 0.1
+                    if x_differential[-1]==0:
+                        x_differential[-1]= 0.1
+                    x_differential = [x_differential[ind] if x_differential[ind] != 0
+                                      else 0.5 * (x_differential[ind-1] + x_differential[ind+1])
+                                      for ind in range(len(x_differential))]
+
+                    if x_differential[0]==0.1:
+                        x_differential[0] = x_differential[1]
+                    if x_differential[-1]==0.1:
+                        x_differential[-1] = x_differential[-2]
+                    x_differential.append(x_differential[0])
+
+                    x_differential = [-1 if x_differential[ind] < 0 else 1
+                                      for ind in range(len(x_differential))]
+                    x_differential = self.return_it_in_two_groups(x_differential)
+                    x_differential = np.array(x_differential)
+
+                    con_scaled = con_ind*1
+                    con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential
+                    con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8
+                    con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8
+
+                    try:
+                        con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5
+                        con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5
+                    except:
+                        pass
+
+                    con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8
+                    con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8
+
+                    try:
+                        con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5
+                        con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5
+                    except:
+                        pass
+
+                con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
+                con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
+
+                all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1]
+                all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
+
+        return all_found_textline_polygons
 
     def delete_regions_without_textlines(
             self, slopes, all_found_textline_polygons, boxes_text, txt_con_org,
@@ -4431,7 +4548,7 @@ class Eynollah:
         self.logger.info("Enhancing took %.1fs ", time.time() - t0)
         
         if self.extract_only_images:
-            text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \
+            text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
                 self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [],
@@ -4459,7 +4576,8 @@ class Eynollah:
 
             all_found_textline_polygons=[ all_found_textline_polygons ]
 
-            all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
+            all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
+                all_found_textline_polygons)
             all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                 all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
             
@@ -4477,7 +4595,7 @@ class Eynollah:
             all_found_textline_polygons_marginals_right = []
             all_box_coord_marginals_left = []
             all_box_coord_marginals_right = []
-            polygons_seplines = []
+            polygons_lines_xml = []
             contours_tables = []
             conf_contours_textregions =[0]
             
@@ -4491,13 +4609,13 @@ class Eynollah:
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
                 all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
-                cont_page, polygons_seplines, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
+                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
             return pcgts
 
         #print("text region early -1 in %.1fs", time.time() - t0)
         t1 = time.time()
         if self.light_version:
-            text_regions_p_1, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
+            text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
                 self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
             #print("text region early -2 in %.1fs", time.time() - t0)
 
@@ -4510,9 +4628,9 @@ class Eynollah:
 
                 textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
 
-                slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew)
+                slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew)
             else:
-                slope_deskew = self.run_deskew(textline_mask_tot_ea)
+                slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
             #print("text region early -2,5 in %.1fs", time.time() - t0)
             #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
             num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
@@ -4524,7 +4642,7 @@ class Eynollah:
             textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
             #print("text region early -4 in %.1fs", time.time() - t0)
         else:
-            text_regions_p_1, erosion_hurts, polygons_seplines = \
+            text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
                 self.get_regions_from_xy_2models(img_res, is_image_enhanced,
                                                  num_col_classifier)
             self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
@@ -4551,7 +4669,7 @@ class Eynollah:
             textline_mask_tot_ea = self.run_textline(image_page)
             self.logger.info("textline detection took %.1fs", time.time() - t1)
             t1 = time.time()
-            slope_deskew = self.run_deskew(textline_mask_tot_ea)
+            slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
             self.logger.info("deskewing took %.1fs", time.time() - t1)
         elif num_col_classifier in (1,2):
             org_h_l_m = textline_mask_tot_ea.shape[0]
@@ -4569,12 +4687,9 @@ class Eynollah:
             text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
             table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
 
-        textline_mask_tot, text_regions_p = \
-            self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines,
+        textline_mask_tot, text_regions_p, image_page_rotated = \
+            self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
                                num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
-        if self.plotter:
-            self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
-            self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
 
         if self.light_version and num_col_classifier in (1,2):
             image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
@@ -4583,6 +4698,7 @@ class Eynollah:
             textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m )
             text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m )
             table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
+            image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
 
         self.logger.info("detection of marginals took %.1fs", time.time() - t1)
         #print("text region early 2 marginal in %.1fs", time.time() - t0)
@@ -4593,14 +4709,14 @@ class Eynollah:
                 boxes, boxes_d, polygons_of_marginals, contours_tables = \
                 self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                               num_col_classifier, table_prediction, erosion_hurts)
-            ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
+            ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
         else:
             polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
                 regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
                 self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                            num_col_classifier, img_only_regions, table_prediction, erosion_hurts,
                                            img_bin_light if self.light_version else None)
-            ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
+            ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
             if self.light_version:
                 drop_label_in_full_layout = 4
                 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0
@@ -4724,30 +4840,31 @@ class Eynollah:
                     [], [], page_coord, [], [], [], [], [], [],
                     polygons_of_images, contours_tables, [],
                     polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [],
-                    cont_page, polygons_seplines)
+                    cont_page, polygons_lines_xml)
             else:
                 pcgts = self.writer.build_pagexml_no_full_layout(
                     [], page_coord, [], [], [], [],
                     polygons_of_images,
                     polygons_of_marginals, polygons_of_marginals,  empty_marginals, empty_marginals, empty_marginals, empty_marginals,  [], [], [], 
-                    cont_page, polygons_seplines, contours_tables)
+                    cont_page, polygons_lines_xml, contours_tables)
             return pcgts
 
 
 
         #print("text region early 3 in %.1fs", time.time() - t0)
         if self.light_version:
-            contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
+            contours_only_text_parent = self.dilate_textregions_contours(
+                contours_only_text_parent)
             contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one(
                 contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals)
             #print("text region early 3.5 in %.1fs", time.time() - t0)
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
-                contours_only_text_parent, self.image, confidence_matrix,  map=self.executor.map)
-            #txt_con_org = dilate_textregion_contours(txt_con_org)
-            #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
+                contours_only_text_parent, self.image, slope_first, confidence_matrix,  map=self.executor.map)
+            #txt_con_org = self.dilate_textregions_contours(txt_con_org)
+            #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent)
         else:
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
-                contours_only_text_parent, self.image, confidence_matrix,  map=self.executor.map)
+                contours_only_text_parent, self.image, slope_first, confidence_matrix,  map=self.executor.map)
         #print("text region early 4 in %.1fs", time.time() - t0)
         boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
         boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
@@ -4759,11 +4876,11 @@ class Eynollah:
                     all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
                         all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2(
                             txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org,
-                            boxes_text, slope_deskew)
+                            image_page_rotated, boxes_text, slope_deskew)
                     all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                         all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2(
                             polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org,
-                            boxes_marginals, slope_deskew)
+                            image_page_rotated, boxes_marginals, slope_deskew)
 
                     #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
                     #    self.delete_regions_without_textlines(slopes, all_found_textline_polygons,
@@ -4771,10 +4888,14 @@ class Eynollah:
                     #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \
                     #    self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals,
                     #        boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
-                    all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
+                    #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
+                    #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
+                    all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
+                        all_found_textline_polygons)
                     all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                         all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
-                    all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals)
+                    all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(
+                        all_found_textline_polygons_marginals)
                     contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \
                         index_by_text_par_con = self.filter_contours_without_textline_inside(
                             contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions)
@@ -4783,11 +4904,11 @@ class Eynollah:
                     all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \
                         index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light(
                             txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
-                            boxes_text, slope_deskew)
+                            image_page_rotated, boxes_text, slope_deskew)
                     all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                         all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light(
                             polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
-                            boxes_marginals, slope_deskew)
+                            image_page_rotated, boxes_marginals, slope_deskew)
                     #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                     #    all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
             else:
@@ -4795,25 +4916,25 @@ class Eynollah:
                 all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
                     all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new(
                         txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
-                        boxes_text, slope_deskew)
+                        image_page_rotated, boxes_text, slope_deskew)
                 all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                     all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new(
                         polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
-                        boxes_marginals, slope_deskew)
+                        image_page_rotated, boxes_marginals, slope_deskew)
         else:
             scale_param = 1
             textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
             all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
                 all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(
                     txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode,
-                    boxes_text, text_only,
+                    image_page_rotated, boxes_text, text_only,
                     num_col_classifier, scale_param, slope_deskew)
             all_found_textline_polygons = small_textlines_to_parent_adherence2(
                 all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
             all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                 all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(
                     polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode,
-                    boxes_marginals, text_only,
+                    image_page_rotated, boxes_marginals, text_only,
                     num_col_classifier, scale_param, slope_deskew)
             all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
                 all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
@@ -4950,7 +5071,7 @@ class Eynollah:
                 all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
                 polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
                 all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
-                cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
+                cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
             return pcgts
 
         contours_only_text_parent_h = None
@@ -5042,7 +5163,7 @@ class Eynollah:
             txt_con_org, page_coord, order_text_new, id_of_texts_tot,
             all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
             all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
-            cont_page, polygons_seplines, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
+            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
         return pcgts
 
 
@@ -5178,8 +5299,12 @@ class Eynollah_ocr:
                 cropped_lines = []
                 cropped_lines_region_indexer = []
                 cropped_lines_meging_indexing = []
+                
+                extracted_texts = []
 
                 indexer_text_region = 0
+                indexer_b_s = 0
+                
                 for nn in root1.iter(region_tags):
                     for child_textregion in nn:
                         if child_textregion.tag.endswith("TextLine"):
@@ -5204,40 +5329,105 @@ class Eynollah_ocr:
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
                                     img_crop[mask_poly==0] = 255
                                     
+                                    
                                     if h2w_ratio > 0.1:
                                         cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)  )
                                         cropped_lines_meging_indexing.append(0)
+                                        indexer_b_s+=1
+                                        if indexer_b_s==self.b_s:
+                                            imgs = cropped_lines[:]
+                                            cropped_lines = []
+                                            indexer_b_s = 0
+                                            
+                                            pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                            generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
+                                            generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                            
+                                            extracted_texts = extracted_texts + generated_text_merged
+                                            
                                     else:
                                         splited_images, _ = return_textlines_split_if_needed(img_crop, None)
                                         #print(splited_images)
                                         if splited_images:
                                             cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
                                             cropped_lines_meging_indexing.append(1)
+                                            indexer_b_s+=1
+                                            
+                                            if indexer_b_s==self.b_s:
+                                                imgs = cropped_lines[:]
+                                                cropped_lines = []
+                                                indexer_b_s = 0
+                                                
+                                                pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                                generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
+                                                generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                                
+                                                extracted_texts = extracted_texts + generated_text_merged
+                                            
+                                            
                                             cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
                                             cropped_lines_meging_indexing.append(-1)
+                                            indexer_b_s+=1
+                                            
+                                            if indexer_b_s==self.b_s:
+                                                imgs = cropped_lines[:]
+                                                cropped_lines = []
+                                                indexer_b_s = 0
+                                                
+                                                pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                                generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
+                                                generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                                
+                                                extracted_texts = extracted_texts + generated_text_merged
+                                                
                                         else:
                                             cropped_lines.append(img_crop)
                                             cropped_lines_meging_indexing.append(0)
+                                            indexer_b_s+=1
+                                            
+                                            if indexer_b_s==self.b_s:
+                                                imgs = cropped_lines[:]
+                                                cropped_lines = []
+                                                indexer_b_s = 0
+                                                
+                                                pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                                generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
+                                                generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                                
+                                                extracted_texts = extracted_texts + generated_text_merged
+                                                
+                    
+                                            
                     indexer_text_region = indexer_text_region +1
         
-        
-                extracted_texts = []
-                n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
-
-                for i in range(n_iterations):
-                    if i==(n_iterations-1):
-                        n_start = i*self.b_s
-                        imgs = cropped_lines[n_start:]
-                    else:
-                        n_start = i*self.b_s
-                        n_end = (i+1)*self.b_s
-                        imgs = cropped_lines[n_start:n_end]
+                if indexer_b_s!=0:
+                    imgs = cropped_lines[:]
+                    cropped_lines = []
+                    indexer_b_s = 0
+                    
                     pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
                     generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
                     generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
                     
                     extracted_texts = extracted_texts + generated_text_merged
                     
+                ####extracted_texts = []
+                ####n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
+
+                ####for i in range(n_iterations):
+                    ####if i==(n_iterations-1):
+                        ####n_start = i*self.b_s
+                        ####imgs = cropped_lines[n_start:]
+                    ####else:
+                        ####n_start = i*self.b_s
+                        ####n_end = (i+1)*self.b_s
+                        ####imgs = cropped_lines[n_start:n_end]
+                    ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                    ####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
+                    ####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                    
+                    ####extracted_texts = extracted_texts + generated_text_merged
+                    
                 del cropped_lines
                 gc.collect()
 
@@ -5288,31 +5478,71 @@ class Eynollah_ocr:
 
                 #print(time.time() - t0 ,'elapsed time')
 
-
                 indexer = 0
                 indexer_textregion = 0
                 for nn in root1.iter(region_tags):
-                    text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
-                    unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
+                    #id_textregion = nn.attrib['id']
+                    #id_textregions.append(id_textregion)
+                    #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
+                    
+                    is_textregion_text = False
+                    for childtest in nn:
+                        if childtest.tag.endswith("TextEquiv"):
+                            is_textregion_text = True
+                    
+                    if not is_textregion_text:
+                        text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
+                        unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
 
                     
                     has_textline = False
                     for child_textregion in nn:
                         if child_textregion.tag.endswith("TextLine"):
-                            text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
-                            unicode_textline = ET.SubElement(text_subelement, 'Unicode')
-                            unicode_textline.text = extracted_texts_merged[indexer]
+                            
+                            is_textline_text = False
+                            for childtest2 in child_textregion:
+                                if childtest2.tag.endswith("TextEquiv"):
+                                    is_textline_text = True
+                            
+                            
+                            if not is_textline_text:
+                                text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
+                                ##text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
+                                unicode_textline = ET.SubElement(text_subelement, 'Unicode')
+                                unicode_textline.text = extracted_texts_merged[indexer]
+                            else:
+                                for childtest3 in child_textregion:
+                                    if childtest3.tag.endswith("TextEquiv"):
+                                        for child_uc in childtest3:
+                                            if child_uc.tag.endswith("Unicode"):
+                                                ##childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
+                                                child_uc.text = extracted_texts_merged[indexer]
+                                    
                             indexer = indexer + 1
                             has_textline = True
                     if has_textline:
-                        unicode_textregion.text = text_by_textregion[indexer_textregion]
+                        if is_textregion_text:
+                            for child4 in nn:
+                                if child4.tag.endswith("TextEquiv"):
+                                    for childtr_uc in child4:
+                                        if childtr_uc.tag.endswith("Unicode"):
+                                            childtr_uc.text = text_by_textregion[indexer_textregion]
+                        else:
+                            unicode_textregion.text = text_by_textregion[indexer_textregion]
                         indexer_textregion = indexer_textregion + 1
                         
-
-
+                ###sample_order  = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order]
+                
+                ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
+                ##tot_page_text = ' '.join(ordered_texts_sample)
+                
+                ##for page_element in root1.iter(link+'Page'):
+                    ##text_page = ET.SubElement(page_element, 'TextEquiv')
+                    ##unicode_textpage = ET.SubElement(text_page, 'Unicode')
+                    ##unicode_textpage.text = tot_page_text
+                
                 ET.register_namespace("",name_space)
                 tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
-                #print("Job done in %.1fs", time.time() - t0)
         else:
             ###max_len = 280#512#280#512
             ###padding_token = 1500#299#1500#299
diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index 058ab83..ca86047 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -1,4 +1,3 @@
-from typing import Tuple
 import time
 import math
 
@@ -299,17 +298,9 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
             x_end_with_child_without_mother,
             new_main_sep_y)
 
-def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
-    return (box[1], box[1] + box[3],
-            box[0], box[0] + box[2])
-
-def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]:
-    return (slice(box[1], box[1] + box[3]),
-            slice(box[0], box[0] + box[2]))
-
 def crop_image_inside_box(box, img_org_copy):
-    image_box = img_org_copy[box2slice(box)]
-    return image_box, box2rect(box)
+    image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
+    return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
 
 def otsu_copy_binary(img):
     img_r = np.zeros((img.shape[0], img.shape[1], 3))
@@ -860,8 +851,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
         all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1)
 
         percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels)
-        
-        if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.7 and
+        if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and
             percent_text_to_all_in_drop >= 0.3):
             layout_in_patch[box0] = drop_capital_label
         else:
@@ -965,11 +955,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
     regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
                                                          regions_model_full.shape[0] // zoom),
                                     interpolation=cv2.INTER_NEAREST)
-    contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent]
+    contours_only_text_parent = [(i / zoom).astype(int) for i in  contours_only_text_parent]
 
     ###
     cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
-        find_new_features_of_contours(contours_only_text_parent_z)
+        find_new_features_of_contours(contours_only_text_parent)
 
     length_con=x_max_main-x_min_main
     height_con=y_max_main-y_min_main
@@ -992,7 +982,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
     contours_only_text_parent_main_d=[]
     contours_only_text_parent_head_d=[]
 
-    for ii, con in enumerate(contours_only_text_parent_z):
+    for ii in range(len(contours_only_text_parent)):
+        con=contours_only_text_parent[ii]
         img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3))
         img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
 
@@ -1003,22 +994,23 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
 
         if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
-            contours_only_text_parent_head.append(contours_only_text_parent[ii])
-            conf_contours_head.append(None) # why not conf_contours[ii], too?
+            contours_only_text_parent_head.append(con)
             if contours_only_text_parent_d_ordered is not None:
                 contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
             all_box_coord_head.append(all_box_coord[ii])
             slopes_head.append(slopes[ii])
             all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
+            conf_contours_head.append(None)
         else:
             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
-            contours_only_text_parent_main.append(contours_only_text_parent[ii])
+            contours_only_text_parent_main.append(con)
             conf_contours_main.append(conf_contours[ii])
             if contours_only_text_parent_d_ordered is not None:
                 contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
             all_box_coord_main.append(all_box_coord[ii])
             slopes_main.append(slopes[ii])
             all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
+
         #print(all_pixels,pixels_main,pixels_header)
 
     ### to make it faster
@@ -1026,6 +1018,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
     # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
     #                                       regions_model_full.shape[0] // zoom),
     #                                 interpolation=cv2.INTER_NEAREST)
+    contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
+    contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
     ###
 
     return (regions_model_1,
@@ -1748,7 +1742,6 @@ def return_boxes_of_images_by_order_of_reading_new(
             x_ending = np.array(x_ending)
             y_type_2 = np.array(y_type_2)
             y_diff_type_2 = np.array(y_diff_type_2)
-            all_columns = set(range(len(peaks_neg_tot) - 1))
 
             if ((reading_order_type==1) or
                 (reading_order_type==0 and
@@ -1870,7 +1863,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                             x_end_by_order.append(len(peaks_neg_tot)-2)
                         else:
                             #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                            columns_covered_by_mothers = set()
+                            columns_covered_by_mothers = []
                             for dj in range(len(x_start_without_mother)):
                                 columns_covered_by_mothers = columns_covered_by_mothers + \
                                     list(range(int(x_start_without_mother[dj]),
@@ -1882,7 +1875,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                             y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
                             ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
                             ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                            x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
+                            x_starting = np.append(x_starting, columns_not_covered)
                             x_starting = np.append(x_starting, x_start_without_mother)
                             x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
                             x_ending = np.append(x_ending, x_end_without_mother)
@@ -1913,7 +1906,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                 x_end_by_order.append(x_end_column_sort[ii]-1)
                     else:
                         #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                        columns_covered_by_mothers = set()
+                        columns_covered_by_mothers = []
                         for dj in range(len(x_start_without_mother)):
                             columns_covered_by_mothers = columns_covered_by_mothers + \
                                 list(range(int(x_start_without_mother[dj]),
@@ -1925,12 +1918,12 @@ def return_boxes_of_images_by_order_of_reading_new(
                         y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
                         ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
                         ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                        x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
+                        x_starting = np.append(x_starting, columns_not_covered)
                         x_starting = np.append(x_starting, x_start_without_mother)
-                        x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
+                        x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
                         x_ending = np.append(x_ending, x_end_without_mother)
 
-                        columns_covered_by_with_child_no_mothers = set()
+                        columns_covered_by_with_child_no_mothers = []
                         for dj in range(len(x_end_with_child_without_mother)):
                             columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
                                 list(range(int(x_start_with_child_without_mother[dj]),
@@ -1974,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                     if len(x_diff_all_between_nm_wc)>0:
                                         biggest=np.argmax(x_diff_all_between_nm_wc)
 
-                                    columns_covered_by_mothers = set()
+                                    columns_covered_by_mothers = []
                                     for dj in range(len(x_starting_all_between_nm_wc)):
                                         columns_covered_by_mothers = columns_covered_by_mothers + \
                                             list(range(int(x_starting_all_between_nm_wc[dj]),
@@ -2099,7 +2092,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                 x_start_by_order=[]
                 x_end_by_order=[]
                 if len(x_starting)>0:
-                    columns_covered_by_lines_covered_more_than_2col = set()
+                    all_columns = np.arange(len(peaks_neg_tot)-1)
+                    columns_covered_by_lines_covered_more_than_2col = []
                     for dj in range(len(x_starting)):
                         if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns):
                             pass
@@ -2112,21 +2106,22 @@ def return_boxes_of_images_by_order_of_reading_new(
                     y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
                     ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
                     ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
-                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
+                    x_starting = np.append(x_starting, columns_not_covered)
+                    x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
                     if len(new_main_sep_y) > 0:
                         x_starting = np.append(x_starting, 0)
-                        x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
+                        x_ending = np.append(x_ending, len(peaks_neg_tot)-1)
                     else:
                         x_starting = np.append(x_starting, x_starting[0])
                         x_ending = np.append(x_ending, x_ending[0])
                 else:
-                    columns_not_covered = list(all_columns)
+                    all_columns = np.arange(len(peaks_neg_tot)-1)
+                    columns_not_covered = list(set(all_columns))
                     y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
                     ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
                     ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
-                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
+                    x_starting = np.append(x_starting, columns_not_covered)
+                    x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
 
                 ind_args=np.array(range(len(y_type_2)))
                 
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index ee2faa7..0e84153 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -1,15 +1,7 @@
-from typing import Sequence, Union
-from numbers import Number
 from functools import partial
-import itertools
-
 import cv2
 import numpy as np
-from scipy.sparse.csgraph import minimum_spanning_tree
-from shapely.geometry import Polygon, LineString
-from shapely.geometry.polygon import orient
-from shapely import set_precision
-from shapely.ops import unary_union, nearest_points
+from shapely import geometry
 
 from .rotate import rotate_image, rotation_image_new
 
@@ -45,28 +37,29 @@ def get_text_region_boxes_by_given_contours(contours):
 
     return boxes, contours_new
 
-def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
+def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
     found_polygons_early = []
-    for jv, contour in enumerate(contours):
-        if len(contour) < 3:  # A polygon cannot have less than 3 points
+    for jv,c in enumerate(contours):
+        if len(c) < 3:  # A polygon cannot have less than 3 points
             continue
 
-        polygon = contour2polygon(contour, dilate=dilate)
+        polygon = geometry.Polygon([point[0] for point in c])
         area = polygon.area
         if (area >= min_area * np.prod(image.shape[:2]) and
             area <= max_area * np.prod(image.shape[:2]) and
             hierarchy[0][jv][3] == -1):
-            found_polygons_early.append(polygon2contour(polygon))
+            found_polygons_early.append(np.array([[point]
+                                                  for point in polygon.exterior.coords], dtype=np.uint))
     return found_polygons_early
 
-def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
+def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
     found_polygons_early = []
-    for jv, contour in enumerate(contours):
-        if len(contour) < 3:  # A polygon cannot have less than 3 points
+    for jv,c in enumerate(contours):
+        if len(c) < 3:  # A polygon cannot have less than 3 points
             continue
 
-        polygon = contour2polygon(contour, dilate=dilate)
-        # area = cv2.contourArea(contour)
+        polygon = geometry.Polygon([point[0] for point in c])
+        # area = cv2.contourArea(c)
         area = polygon.area
         ##print(np.prod(thresh.shape[:2]))
         # Check that polygon has area greater than minimal area
@@ -75,8 +68,9 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.
             area <= max_area * np.prod(image.shape[:2]) and
             # hierarchy[0][jv][3]==-1
             True):
-            # print(contour[0][0][1])
-            found_polygons_early.append(polygon2contour(polygon))
+            # print(c[0][0][1])
+            found_polygons_early.append(np.array([[point]
+                                                  for point in polygon.exterior.coords], dtype=np.int32))
     return found_polygons_early
 
 def find_new_features_of_contours(contours_main):
@@ -141,12 +135,12 @@ def return_parent_contours(contours, hierarchy):
                        if hierarchy[0][i][3] == -1]
     return contours_parent
 
-def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
+def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == label) * 1
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == label) * 1
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -253,23 +247,30 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
         cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
     return cont_int[0], index_r_con, confidence_contour
 
-def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix, map=map):
+def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map):
     if not len(cnts):
         return [], []
+    
+    confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
+    img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
+    ##cnts = list( (np.array(cnts)/2).astype(np.int16) )
+    #cnts = cnts/2
+    cnts = [(i/6).astype(int) for i in cnts]
+    results = map(partial(do_back_rotation_and_get_cnt_back,
+                          img=img,
+                          slope_first=slope_first,
+                          confidence_matrix=confidence_matrix,
+                          ),
+                  cnts, range(len(cnts)))
+    contours, indexes, conf_contours = tuple(zip(*results))
+    return [i*6 for i in contours], list(conf_contours)
 
-    confs = []
-    for cnt in cnts:
-        cnt_mask = np.zeros(confidence_matrix.shape)
-        cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt], color=1.0)
-        confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
-    return cnts, confs
-
-def return_contours_of_interested_textline(region_pre_p, label):
+def return_contours_of_interested_textline(region_pre_p, pixel):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == label) * 1
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == label) * 1
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -292,12 +293,12 @@ def return_contours_of_image(image):
     contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
     return contours, hierarchy
 
-def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003):
+def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == label) * 1
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == label) * 1
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -310,12 +311,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_si
 
     return contours_imgs
 
-def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area):
+def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == label) * 1
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == label) * 1
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -331,97 +332,3 @@ def return_contours_of_interested_region_by_size(region_pre_p, label, min_area,
 
     return img_ret[:, :, 0]
 
-def dilate_textline_contours(all_found_textline_polygons):
-    return [[polygon2contour(contour2polygon(contour, dilate=6))
-             for contour in region]
-            for region in all_found_textline_polygons]
-
-def dilate_textregion_contours(all_found_textline_polygons):
-    return [polygon2contour(contour2polygon(contour, dilate=6))
-            for contour in all_found_textline_polygons]
-
-def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
-    polygon = Polygon([point[0] for point in contour])
-    if dilate:
-        polygon = polygon.buffer(dilate)
-    if polygon.geom_type == 'GeometryCollection':
-        # heterogeneous result: filter zero-area shapes (LineString, Point)
-        polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
-    if polygon.geom_type == 'MultiPolygon':
-        # homogeneous result: construct convex hull to connect
-        polygon = join_polygons(polygon.geoms)
-    return make_valid(polygon)
-
-def polygon2contour(polygon: Polygon) -> np.ndarray:
-    polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
-    return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
-
-def make_valid(polygon: Polygon) -> Polygon:
-    """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
-    def isint(x):
-        return isinstance(x, int) or int(x) == x
-    # make sure rounding does not invalidate
-    if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
-        polygon = Polygon(np.round(polygon.exterior.coords))
-    points = list(polygon.exterior.coords[:-1])
-    # try by re-arranging points
-    for split in range(1, len(points)):
-        if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
-            break
-        # simplification may not be possible (at all) due to ordering
-        # in that case, try another starting point
-        polygon = Polygon(points[-split:]+points[:-split])
-    # try by simplification
-    for tolerance in range(int(polygon.area + 1.5)):
-        if polygon.is_valid:
-            break
-        # simplification may require a larger tolerance
-        polygon = polygon.simplify(tolerance + 1)
-    # try by enlarging
-    for tolerance in range(1, int(polygon.area + 2.5)):
-        if polygon.is_valid:
-            break
-        # enlargement may require a larger tolerance
-        polygon = polygon.buffer(tolerance)
-    assert polygon.is_valid, polygon.wkt
-    return polygon
-
-def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
-    """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
-    # ensure input polygons are simply typed and all oriented equally
-    polygons = [orient(poly)
-                for poly in itertools.chain.from_iterable(
-                        [poly.geoms
-                         if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
-                         else [poly]
-                         for poly in polygons])]
-    npoly = len(polygons)
-    if npoly == 1:
-        return polygons[0]
-    # find min-dist path through all polygons (travelling salesman)
-    pairs = itertools.combinations(range(npoly), 2)
-    dists = np.zeros((npoly, npoly), dtype=float)
-    for i, j in pairs:
-        dist = polygons[i].distance(polygons[j])
-        if dist < 1e-5:
-            dist = 1e-5 # if pair merely touches, we still need to get an edge
-        dists[i, j] = dist
-        dists[j, i] = dist
-    dists = minimum_spanning_tree(dists, overwrite=True)
-    # add bridge polygons (where necessary)
-    for prevp, nextp in zip(*dists.nonzero()):
-        prevp = polygons[prevp]
-        nextp = polygons[nextp]
-        nearest = nearest_points(prevp, nextp)
-        bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
-        polygons.append(bridgep)
-    jointp = unary_union(polygons)
-    assert jointp.geom_type == 'Polygon', jointp.wkt
-    # follow-up calculations will necessarily be integer;
-    # so anticipate rounding here and then ensure validity
-    jointp2 = set_precision(jointp, 1.0)
-    if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
-        jointp2 = Polygon(np.round(jointp.exterior.coords))
-        jointp2 = make_valid(jointp2)
-    assert jointp2.geom_type == 'Polygon', jointp2.wkt
-    return jointp2
diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py
index 595cd14..ac8dc1d 100644
--- a/src/eynollah/utils/marginals.py
+++ b/src/eynollah/utils/marginals.py
@@ -99,8 +99,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
         except:
             point_left=first_nonzero
 
-        if point_left == first_nonzero and point_right == last_nonzero:
-            return text_regions
 
 
         if point_right>=mask_marginals.shape[1]:
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index dd37b89..ead5cfb 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -20,8 +20,6 @@ from .contour import (
 from . import (
     find_num_col_deskew,
     crop_image_inside_box,
-    box2rect,
-    box2slice,
 )
 
 def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@@ -1349,26 +1347,24 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
 
     return contours_rotated_clean
 
-def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
+def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
     if logger is None:
         logger = getLogger(__package__)
-    if not np.prod(img_crop.shape):
-        return img_crop
 
     if num_col == 1:
-        num_patches = int(img_crop.shape[1] / 200.0)
+        num_patches = int(img_path.shape[1] / 200.0)
     else:
-        num_patches = int(img_crop.shape[1] / 140.0)
-    # num_patches=int(img_crop.shape[1]/200.)
+        num_patches = int(img_path.shape[1] / 140.0)
+    # num_patches=int(img_path.shape[1]/200.)
     if num_patches == 0:
         num_patches = 1
 
-    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
+    img_patch_ineterst = img_path[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
 
-    # plt.imshow(img_patch_interest)
+    # plt.imshow(img_patch_ineterst)
     # plt.show()
 
-    length_x = int(img_crop.shape[1] / float(num_patches))
+    length_x = int(img_path.shape[1] / float(num_patches))
     # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
     margin = int(0.04 * length_x)
     # if margin<=4:
@@ -1376,7 +1372,7 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
     # margin=0
 
     width_mid = length_x - 2 * margin
-    nxf = img_crop.shape[1] / float(width_mid)
+    nxf = img_path.shape[1] / float(width_mid)
 
     if nxf > int(nxf):
         nxf = int(nxf) + 1
@@ -1392,12 +1388,12 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
             index_x_d = i * width_mid
             index_x_u = index_x_d + length_x
 
-        if index_x_u > img_crop.shape[1]:
-            index_x_u = img_crop.shape[1]
-            index_x_d = img_crop.shape[1] - length_x
+        if index_x_u > img_path.shape[1]:
+            index_x_u = img_path.shape[1]
+            index_x_d = img_path.shape[1] - length_x
 
         # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-        img_xline = img_patch_interest[:, index_x_d:index_x_u]
+        img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
 
         try:
             assert img_xline.any()
@@ -1413,9 +1409,9 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
         img_line_rotated = rotate_image(img_xline, slope_xline)
         img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
         
-    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
+    img_patch_ineterst = img_path[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
 
-    img_patch_interest_revised = np.zeros(img_patch_interest.shape)
+    img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
 
     for i in range(nxf):
         if i == 0:
@@ -1425,11 +1421,11 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
             index_x_d = i * width_mid
             index_x_u = index_x_d + length_x
 
-        if index_x_u > img_crop.shape[1]:
-            index_x_u = img_crop.shape[1]
-            index_x_d = img_crop.shape[1] - length_x
+        if index_x_u > img_path.shape[1]:
+            index_x_u = img_path.shape[1]
+            index_x_d = img_path.shape[1] - length_x
 
-        img_xline = img_patch_interest[:, index_x_d:index_x_u]
+        img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
 
         img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
         img_int[:, :] = img_xline[:, :]  # img_patch_org[:,:,0]
@@ -1452,9 +1448,9 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
             int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
 
         img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
-        img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
+        img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
 
-    return img_patch_interest_revised
+    return img_patch_ineterst_revised
 
 def do_image_rotation(angle, img, sigma_des, logger=None):
     if logger is None:
@@ -1635,7 +1631,7 @@ def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
 
 def do_work_of_slopes_new(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, slope_deskew,
+        textline_mask_tot_ea, image_page_rotated, slope_deskew,
         logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
     if KERNEL is None:
@@ -1645,7 +1641,7 @@ def do_work_of_slopes_new(
     logger.debug('enter do_work_of_slopes_new')
 
     x, y, w, h = box_text
-    crop_coor = box2rect(box_text)
+    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
     mask_textline = np.zeros(textline_mask_tot_ea.shape)
     mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
     all_text_region_raw = textline_mask_tot_ea * mask_textline
@@ -1653,7 +1649,7 @@ def do_work_of_slopes_new(
     img_int_p = all_text_region_raw[:,:]
     img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
 
-    if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
+    if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
         slope = 0
         slope_for_all = slope_deskew
         all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
@@ -1693,7 +1689,7 @@ def do_work_of_slopes_new(
 
 def do_work_of_slopes_new_curved(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew,
+        textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
         logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
     if KERNEL is None:
@@ -1710,7 +1706,7 @@ def do_work_of_slopes_new_curved(
     # plt.imshow(img_int_p)
     # plt.show()
 
-    if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
+    if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
         slope = 0
         slope_for_all = slope_deskew
     else:
@@ -1736,7 +1732,7 @@ def do_work_of_slopes_new_curved(
             slope_for_all = slope_deskew
         slope = slope_for_all
 
-    crop_coor = box2rect(box_text)
+    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
 
     if abs(slope_for_all) < 45:
         textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
@@ -1782,7 +1778,7 @@ def do_work_of_slopes_new_curved(
 
 def do_work_of_slopes_new_light(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, slope_deskew, textline_light,
+        textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,
         logger=None
 ):
     if logger is None:
@@ -1790,7 +1786,7 @@ def do_work_of_slopes_new_light(
     logger.debug('enter do_work_of_slopes_new_light')
 
     x, y, w, h = box_text
-    crop_coor = box2rect(box_text)
+    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
     mask_textline = np.zeros(textline_mask_tot_ea.shape)
     mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
     all_text_region_raw = textline_mask_tot_ea * mask_textline
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index 936c95f..2f9caf3 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -289,7 +289,7 @@ class EynollahXmlWriter():
 
         self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
         for mm in range(len(found_polygons_text_region_h)):
-            textregion = TextRegionType(id=counter.next_region_id, type_='heading',
+            textregion = TextRegionType(id=counter.next_region_id, type_='header',
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
             page.add_TextRegion(textregion)
 
@@ -335,7 +335,7 @@ class EynollahXmlWriter():
             page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
 
         for mm in range(len(polygons_lines_to_be_written_in_xml)):
-            page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
+            page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
 
         for mm in range(len(found_polygons_tables)):
             page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))