From 33c055389da22d99eb7082cc842f0b4d669d07ff Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 29 Apr 2026 01:52:29 +0200
Subject: [PATCH] =?UTF-8?q?bold=20`run=5Fsingle`=20refactoring=20(predict?=
 =?UTF-8?q?=20segmentation=20on=20cropped=20img)=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- move `extract_page()` to the start (right after enhancement),
  so early layout and textline model prediction sees cropped
  image
- `extract_page()`: also return page mask
- `get_early_layout()`:
  * use cropped image
  * also run optional table prediction here,
    map table label and confidence already
    (so no need to pass these arrays everywhere)
  * suppress all non-text type regions in textline mask
  * also return text+table mask
    (so no need to reconstruct it everywhere)
- apply page mask to textline mask and early layout result
  (i.e. suppress areas beyond border contour)
- `run_graphics_and_columns()`:
  * rename → `run_columns()`
  * no table prediction here
  * no page extraction here
  * no page cropping+masking here
  * no textline mask suppression here
- `run_graphics_and_columns_without_layout()`: drop
  (not needed anymore)
- `run_marginals()` vs. `get_marginals()`: extract
  `text_mask` internally from early layout
- early page cropping for col-classifier:
  also use cropped image in input binarization mode
- early page cropping for col-classifier:
  get external contours instead of indiscriminate tree
- writer: skip layout mode now also uses cropped coordinates
  (so drop kwarg for it)
---
 src/eynollah/eynollah.py        | 287 ++++++++++++--------------------
 src/eynollah/utils/marginals.py |   5 +-
 src/eynollah/writer.py          |   8 +-
 3 files changed, 116 insertions(+), 184 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 7f5c504..8958071 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -314,10 +314,10 @@ class Eynollah:
         img = self.imread(image, binary=self.input_binary)
 
         width_early = img.shape[1]
-        _, page_coord = self.early_page_for_num_of_column_classification(image)
+        page_img, page_coord = self.early_page_for_num_of_column_classification(img)
 
         if self.input_binary:
-            img_in = img
+            img_in = page_img
         else:
             img_1ch = self.imread(image, grayscale=True, uint8=False)
             img_1ch = img_1ch[page_coord[0]: page_coord[1],
@@ -365,7 +365,7 @@ class Eynollah:
 
         width_early = img.shape[1]
         t1 = time.time()
-        _, page_coord = self.early_page_for_num_of_column_classification(image)
+        page_img, page_coord = self.early_page_for_num_of_column_classification(img)
 
         label_p_pred = np.ones(6)
         conf_col = 1.0
@@ -376,7 +376,7 @@ class Eynollah:
         elif (not self.num_col_upper and not self.num_col_lower or
               self.num_col_upper != self.num_col_lower):
             if self.input_binary:
-                img_in = img
+                img_in = page_img
             else:
                 img_1ch = self.imread(image, grayscale=True)
                 img_1ch = img_1ch[page_coord[0]: page_coord[1],
@@ -856,6 +856,7 @@ class Eynollah:
                                [[w, 0]],
                                [[w, h]],
                                [[0, h]]])]
+        mask_page = np.ones((h, w), dtype=np.uint8)
         if not self.ignore_page_extraction:
             self.logger.debug("enter extract_page")
             #cv2.GaussianBlur(img, (5, 5), 0)
@@ -878,21 +879,22 @@ class Eynollah:
                     #h = h + (self.image.shape[0] - (y + h))
                 box = [x, y, w, h]
                 cropped_page, page_coord = crop_image_inside_box(box, img)
-            self.logger.debug("exit extract_page")
-        return cropped_page, page_coord, cont_page
+                mask_page = np.zeros((h, w), dtype=np.uint8)
+                mask_page = cv2.fillPoly(mask_page, pts=[cnt - [x, y]], color=1)
 
-    def early_page_for_num_of_column_classification(self, image):
-        img = self.imread(image, binary=self.input_binary)
+            self.logger.debug("exit extract_page")
+        return page_coord, cont_page, cropped_page, mask_page
+
+    def early_page_for_num_of_column_classification(self, img):
         if not self.ignore_page_extraction:
             self.logger.debug("enter early_page_for_num_of_column_classification")
-            img = cv2.GaussianBlur(img, (5, 5), 0)
-            prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
+            img2 = cv2.GaussianBlur(img, (5, 5), 0)
+            prediction = self.do_prediction(False, img2, self.model_zoo.get("page"))
             prediction = cv2.dilate(prediction, KERNEL, iterations=3)
-            contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-            if len(contours)>0:
-                cnt_size = np.array([cv2.contourArea(contours[j])
-                                     for j in range(len(contours))])
-                cnt = contours[np.argmax(cnt_size)]
+            contours, _ = cv2.findContours(prediction, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if len(contours):
+                areas = np.array(list(map(cv2.contourArea, contours)))
+                cnt = contours[np.argmax(areas)]
                 box = cv2.boundingRect(cnt)
             else:
                 box = [0, 0, img.shape[1], img.shape[0]]
@@ -1060,21 +1062,18 @@ class Eynollah:
         return result, conf_textline
 
     def get_early_layout(
-            self, image,
+            self, img,
             num_col_classifier,
             label_text=1,
             label_imgs=2,
             label_seps=3,
+            label_tabs=10,
     ):
         self.logger.debug("enter get_early_layout")
         t_in = time.time()
         erosion_hurts = False
-        img = image['img_res']
-        img_height_h = img.shape[0]
-        img_width_h = img.shape[1]
-        img_org = image['img']
-        img_height_org = img_org.shape[0]
-        img_width_org = img_org.shape[1]
+        # already cropped
+        img_height_h, img_width_h = img.shape[:2]
 
         if num_col_classifier == 1:
             img_w_new = 1000
@@ -1088,7 +1087,7 @@ class Eynollah:
             img_w_new = 3000
         else:
             img_w_new = 4000
-        img_h_new = img_w_new * img.shape[0] // img.shape[1]
+        img_h_new = img_w_new * img_height_h // img_width_h
         img_resized = resize_image(img, img_h_new, img_w_new)
         self.logger.debug("detecting textlines on %s with %d colors",
                           str(img_resized.shape), len(np.unique(img_resized)))
@@ -1096,12 +1095,10 @@ class Eynollah:
         textline_mask_tot_ea, confidence_textline = self.run_textline(img_resized)
         textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h)
         confidence_textline = resize_image(confidence_textline, img_height_h, img_width_h)
-        if self.plotter:
-            self.plotter.save_plot_of_textlines(textline_mask_tot_ea, img_resized, image['name'])
 
         if self.skip_layout_and_reading_order:
             self.logger.debug("exit get_early_layout")
-            return None, erosion_hurts, None, None, textline_mask_tot_ea, None, None
+            return erosion_hurts, None, None, None, None, textline_mask_tot_ea, None, None
 
         #print("inside 2 ", time.time()-t_in)
         if num_col_classifier == 1 or num_col_classifier == 2:
@@ -1110,15 +1107,13 @@ class Eynollah:
             else:
                 patches = False
             self.logger.debug("resized to %dx%d for %d cols",
-                              img_resized.shape[1], img_resized.shape[0],
-                              num_col_classifier)
+                              img_w_new, img_h_new, num_col_classifier)
         else:
             new_w = (900+ (num_col_classifier-3)*100)
-            new_h = new_w * img.shape[0] // img.shape[1]
+            new_h = new_w * img_height_h // img_width_h
             img_resized = resize_image(img_resized, new_h, new_w)
-            self.logger.debug("resized to %dx%d (new_w=%d) for %d cols",
-                              img_resized.shape[1], img_resized.shape[0],
-                              new_w, num_col_classifier)
+            self.logger.debug("resized to %dx%d for %d cols",
+                              new_w, new_h, num_col_classifier)
             patches = True
 
         prediction_regions, confidence_regions = \
@@ -1132,9 +1127,16 @@ class Eynollah:
         prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
         confidence_regions = resize_image(confidence_regions, img_height_h, img_width_h)
 
+        if self.tables:
+            prediction_tables, confidence_tables = self.get_tables_from_model(img)
+        else:
+            prediction_tables = np.zeros(img.shape[:2], dtype=np.uint8)
+            confidence_tables = np.zeros(img.shape[:2], dtype=bool)
+
         mask_texts_only = (prediction_regions == label_text).astype('uint8')
         mask_images_only = (prediction_regions == label_imgs).astype('uint8')
         mask_seps_only = (prediction_regions == label_seps).astype('uint8')
+        mask_tabs_only = prediction_tables
 
         ##if num_col_classifier == 1 or num_col_classifier == 2:
             ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
@@ -1148,21 +1150,30 @@ class Eynollah:
         polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
         ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts)
         polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)
+        polygons_of_only_tabs = return_contours_of_interested_region(mask_tabs_only,1,0.00001)
 
         text_regions_p = np.zeros_like(prediction_regions)
         text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_seps, color=label_seps)
         text_regions_p[mask_images_only == 1] = label_imgs
         text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_texts, color=label_text)
+        text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_tabs, color=label_tabs)
 
-        textline_mask_tot_ea[text_regions_p == 0] = 0
+        textline_mask_tot_ea[text_regions_p != label_text] = 0
+        confidence_textline[text_regions_p != label_text] = 0
+        confidence_regions[text_regions_p == label_tabs] = \
+            confidence_tables[text_regions_p == label_tabs]
+
+        regions_without_separators = ((text_regions_p == label_text) |
+                                      (text_regions_p == label_tabs)).astype(np.uint8)
         #plt.imshow(textline_mask_tot_ea)
         #plt.show()
         #print("inside 4 ", time.time()-t_in)
         self.logger.debug("exit get_early_layout")
-        return (text_regions_p,
-                erosion_hurts,
+        return (erosion_hurts,
                 polygons_seplines,
                 polygons_of_only_texts,
+                regions_without_separators,
+                text_regions_p,
                 textline_mask_tot_ea,
                 confidence_regions,
                 confidence_textline)
@@ -1494,65 +1505,22 @@ class Eynollah:
         table_prediction = table_prediction.astype(np.uint8)
         return table_prediction, table_confidence
 
-    def run_graphics_and_columns(
-            self, text_regions_p_1, textline_mask_tot_ea,
-            regions_confidence, textline_confidence,
+    def run_columns(
+            self, text_regions_p_1,
             num_col_classifier, num_column_is_classified,
-            erosion_hurts, image,
+            erosion_hurts,
             label_imgs=2,
             label_seps=3,
     ):
-        """detect page boundary and apply its mask/bbox, post-process column classifier result, optionally detect tables"""
-
+        """post-process column classifier result"""
         t_in_gr = time.time()
+        regions_without_separators = ((text_regions_p_1 != label_seps) &
+                                      (text_regions_p_1 != 0)).astype(np.uint8)
+        if not erosion_hurts:
+            regions_without_separators = cv2.erode(regions_without_separators, KERNEL, iterations=6)
 
-        image_page, page_coord, cont_page = self.extract_page(image)
-        if self.tables:
-            table_prediction, table_confidence = self.get_tables_from_model(image_page)
-        else:
-            table_prediction = np.zeros(image_page.shape[:2], dtype=np.uint8)
-            table_confidence = np.zeros(image_page.shape[:2], dtype=bool)
-
-        if self.plotter:
-            self.plotter.save_page_image(image_page, image['name'])
-
-        if not self.ignore_page_extraction:
-            mask_page = np.zeros_like(text_regions_p_1)
-            mask_page = cv2.fillPoly(mask_page, pts=[cont_page[0]], color=1)
-            mask_page = mask_page == 0
-
-            text_regions_p_1[mask_page] = 0
-            textline_mask_tot_ea[mask_page] = 0
-            regions_confidence[mask_page] = 0
-            textline_confidence[mask_page] = 0
-
-        box = slice(*page_coord[0:2]), slice(*page_coord[2:4])
-        text_regions_p_1 = text_regions_p_1[box]
-        textline_mask_tot_ea = textline_mask_tot_ea[box]
-        regions_confidence = regions_confidence[box]
-        textline_confidence = textline_confidence[box]
-        self.logger.debug("Cropped page is %dx%d", *text_regions_p_1.shape)
-
-        mask_images = (text_regions_p_1 == label_imgs).astype(np.uint8)
-        mask_images = cv2.erode(mask_images, KERNEL, iterations=10)
-        textline_mask_tot_ea[mask_images == 1] = 0
-        textline_confidence[mask_images == 1] = 0
-
-        img_only_regions_with_sep = ((text_regions_p_1 != label_seps) &
-                                     (text_regions_p_1 != 0)).astype(np.uint8)
-
-        #print("inside graphics 2 ", time.time() - t_in_gr)
-        if erosion_hurts:
-            img_only_regions = img_only_regions_with_sep
-        else:
-            img_only_regions = cv2.erode(img_only_regions_with_sep, KERNEL, iterations=6)
-
-        ##print(img_only_regions.shape,'img_only_regions')
-        ##plt.imshow(img_only_regions[:,:])
-        ##plt.show()
-        ##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
         try:
-            num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
+            num_col, _ = find_num_col(regions_without_separators, num_col_classifier, self.tables, multiplier=6.0)
             num_col = num_col + 1
             if not num_column_is_classified:
                 num_col_classifier = num_col
@@ -1562,29 +1530,7 @@ class Eynollah:
         except Exception as why:
             self.logger.exception(why)
             num_col = None
-        return (num_col, num_col_classifier,
-                page_coord, image_page, cont_page,
-                text_regions_p_1,
-                table_prediction,
-                textline_mask_tot_ea,
-                regions_confidence,
-                table_confidence,
-                textline_confidence,
-        )
-
-    def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, image):
-        image_page, page_coord, cont_page = self.extract_page(image)
-
-        mask_page = np.zeros_like(textline_mask_tot_ea)
-        mask_page = cv2.fillPoly(mask_page, pts=[cont_page[0]], color=1)
-        mask_page = mask_page == 0
-
-        textline_mask_tot_ea[mask_page] = 0
-        box = slice(*page_coord[0:2]), slice(*page_coord[2:4])
-        textline_mask_tot_ea = textline_mask_tot_ea[box]
-
-        return page_coord, image_page, textline_mask_tot_ea, cont_page
-
+        return num_col, num_col_classifier
 
     def run_enhancement(self, image):
         t_in = time.time()
@@ -1624,21 +1570,14 @@ class Eynollah:
         self.logger.info("slope_deskew: %.2f°", slope_deskew)
         return slope_deskew
 
-    def run_marginals(
-            self, num_col_classifier, slope_deskew, text_regions_p, table_prediction):
-
-        regions_without_separators = (text_regions_p == 1).astype(np.uint8)
-        if self.tables:
-            regions_without_separators[table_prediction == 1] = 1
-
-        get_marginals(regions_without_separators, text_regions_p,
-                      num_col_classifier, slope_deskew, kernel=KERNEL)
+    def run_marginals(self, num_col_classifier, slope_deskew, text_regions_p):
+        get_marginals(num_col_classifier, slope_deskew, text_regions_p,
+                      kernel=KERNEL)
 
     def get_full_layout(
             self, image_page,
             text_regions_p,
             num_col_classifier,
-            table_prediction,
             label_text=1,
             label_imgs=2,
             label_imgs_fl=5,
@@ -1669,10 +1608,6 @@ class Eynollah:
         text_regions_p[text_regions_p == label_seps] = label_seps_fl
         text_regions_p[text_regions_p == label_marg] = label_marg_fl
 
-        regions_without_separators = (text_regions_p == label_text).astype(np.uint8)
-        # regions_without_separators = ( text_regions_p == 1 | text_regions_p == 2 ) * 1
-
-        image_page = image_page.astype(np.uint8)
         if self.full_layout:
             regions_fully, regionsfl_confidence = self.extract_text_regions_new(
                 image_page,
@@ -1689,20 +1624,13 @@ class Eynollah:
             regions_fully[drops] = label_drop_fl_model
             drops = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
             text_regions_p[drops] = label_drop_fl
-
-            regions_without_separators[drops] = 1 # also cover in reading-order
         else:
             regions_fully = None,
             regionsfl_confidence = None
 
-        if self.tables:
-            text_regions_p[table_prediction == 1] = label_tabs
-            regions_without_separators[table_prediction == 1] = 1
-
         # no need to return text_regions_p (inplace editing)
         self.logger.debug('exit get_full_layout')
-        return (regions_fully, regionsfl_confidence,
-                regions_without_separators)
+        return regions_fully, regionsfl_confidence
 
     def get_deskewed_masks(
             self,
@@ -2200,22 +2128,26 @@ class Eynollah:
                          f"{image['dpi']} DPI, {num_col_classifier} columns")
         self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
 
+        t1 = time.time()
+        page_coord, cont_page, image_page, mask_page = self.extract_page(image)
+        if not self.ignore_page_extraction:
+            self.logger.debug("Cropped page is %dx%d", image_page.shape[1], image_page.shape[0])
+            self.logger.info("Cropping took %.1fs", time.time() - t1)
+        if self.plotter:
+            self.plotter.save_page_image(image_page, image['name'])
+
         # Basic Processing Mode
         if self.skip_layout_and_reading_order:
             self.logger.info("Step 2/5: Basic Processing Mode")
             self.logger.info("Skipping layout analysis and reading order detection")
 
-            _ ,_, _, _, textline_mask_tot_ea, _, _ = \
-                self.get_early_layout(image, num_col_classifier)
+            _, _, _, _, _, textline_mask_tot_ea, _, _ = \
+                self.get_early_layout(image_page, num_col_classifier)
 
-            page_coord, image_page, textline_mask_tot_ea, cont_page = \
-                self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, image)
-
-            ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
-
-            cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
+            textline_mask_tot_ea *= mask_page
+            textline_cnt, textline_hir = return_contours_of_image(textline_mask_tot_ea)
             all_found_textline_polygons = filter_contours_area_of_image(
-                textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
+                textline_mask_tot_ea, textline_cnt, textline_hir, max_area=1, min_area=0.00001)
 
             cx_textlines, cy_textlines = find_center_of_contours(all_found_textline_polygons)
             w_h_textlines = [cv2.boundingRect(polygon)[2:]
@@ -2247,7 +2179,6 @@ class Eynollah:
                 cont_page=cont_page,
                 polygons_seplines=[],
                 conf_textregions=[0],
-                skip_layout_reading_order=True
             )
             self.logger.info("Basic processing complete")
             writer.write_pagexml(pcgts)
@@ -2257,15 +2188,21 @@ class Eynollah:
         t1 = time.time()
         self.logger.info("Step 2/5: Layout Analysis")
 
-        (text_regions_p,
-         erosion_hurts,
+        (erosion_hurts,
          polygons_seplines,
          polygons_text_early,
+         regions_without_separators,
+         text_regions_p,
          textline_mask_tot_ea,
          regions_confidence,
-         textline_confidence) = self.get_early_layout(image, num_col_classifier)
+         textline_confidence) = self.get_early_layout(image_page, num_col_classifier)
+        regions_without_separators *= mask_page
+        text_regions_p *= mask_page
+        textline_mask_tot_ea *= mask_page
         t2 = time.time()
-        self.logger.info("Eearly layout took %.1fs", t2 - t1)
+        self.logger.info("Early layout took %.1fs", t2 - t1)
+        if self.plotter:
+            self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page, image['name'])
 
         if num_col_classifier == 1 or num_col_classifier ==2:
             if num_col_classifier == 1:
@@ -2278,28 +2215,22 @@ class Eynollah:
             slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew)
         else:
             slope_deskew = self.run_deskew(textline_mask_tot_ea)
-        if self.plotter:
-            self.plotter.save_deskewed_image(slope_deskew, image['img'], image['name'])
-        t3 = time.time()
-        self.logger.info("Deskewing took %.1fs", t3 - t2)
-
-        (num_col, num_col_classifier,
-         page_coord, image_page, cont_page,
-         text_regions_p, table_prediction, textline_mask_tot_ea,
-         regions_confidence, table_confidence, textline_confidence) = \
-                self.run_graphics_and_columns(text_regions_p, textline_mask_tot_ea,
-                                              regions_confidence, textline_confidence,
-                                              num_col_classifier, num_column_is_classified,
-                                              erosion_hurts, image)
-        t4 = time.time()
-        self.logger.info("Cropping took %.1fs", t4 - t3)
-        textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
-
         # if ratio of text regions to page area is smaller that 30%,
         # then ignore skew angle above 45°
         if (abs(slope_deskew) > 45 and
             ((text_regions_p == label_text).sum()) <= 0.3 * image_page.size):
             slope_deskew = 0
+        if self.plotter:
+            self.plotter.save_deskewed_image(slope_deskew, image['img'], image['name'])
+        t3 = time.time()
+        self.logger.info("Deskewing took %.1fs", t3 - t2)
+
+        num_col, num_col_classifier = \
+            self.run_columns(text_regions_p,
+                             num_col_classifier, num_column_is_classified,
+                             erosion_hurts)
+        t4 = time.time()
+        textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
 
         if not num_col and len(polygons_text_early) == 0 or not image_page.size:
             self.logger.info("No columns detected - generating empty PAGE-XML")
@@ -2336,32 +2267,34 @@ class Eynollah:
             img_h_new = img_w_new * img_h_org // img_w_org
 
             text_regions_p_new = resize_image(text_regions_p, img_h_new, img_w_new)
-            table_prediction_new = resize_image(table_prediction, img_h_new, img_w_new)
-            self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_new, table_prediction_new)
+            self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_new)
             text_regions_p = resize_image(text_regions_p_new, img_h_org, img_w_org)
 
+            t5 = time.time()
+            self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
+        else:
+            t5 = time.time()
+
         if self.plotter:
             self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
             self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name'])
-        t5 = time.time()
-        self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
-        self.logger.info("Step 3/5: Text Line Detection")
 
-        regions_fully, regionsfl_confidence, regions_without_separators = \
-            self.get_full_layout(image_page,
-                                 text_regions_p,
-                                 num_col_classifier,
-                                 table_prediction)
+        regions_fully, regionsfl_confidence = \
+            self.get_full_layout(image_page, text_regions_p, num_col_classifier)
 
         if self.full_layout:
+            regions_without_separators[text_regions_p == label_drop_fl] = 1 # also cover in reading-order
             textline_mask_tot_ea_org[text_regions_p == label_drop_fl] = 0 # skip for textlines
             textline_mask_tot_ea[text_regions_p == label_drop_fl] = 1 # needed for reading order
             polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p,
                                                                              label_drop_fl,
                                                                              min_area=0.00003)
             conf_drops = get_region_confidences(polygons_of_drop_capitals, regionsfl_confidence)
-        t6 = time.time()
-        self.logger.info("Full layout took %.1fs", t6 - t5)
+            t6 = time.time()
+            self.logger.info("Full layout took %.1fs", t6 - t5)
+        else:
+            t6 = time.time()
+        self.logger.info("Step 3/5: Contour extraction")
 
         min_area_mar = 0.00001
         marginal_mask = (text_regions_p == label_marg_fl).astype(np.uint8)
@@ -2373,7 +2306,7 @@ class Eynollah:
         polygons_of_images = return_contours_of_interested_region(text_regions_p, label_imgs_fl)
         conf_marginals = get_region_confidences(polygons_of_marginals, regions_confidence)
         conf_images = get_region_confidences(polygons_of_images, regions_confidence)
-        conf_tables = get_region_confidences(polygons_of_tables, table_confidence)
+        conf_tables = get_region_confidences(polygons_of_tables, regions_confidence)
 
         polygons_of_textregions = return_contours_of_interested_region(text_regions_p, label_text,
                                                                        min_area=MIN_AREA_REGION)
@@ -2513,7 +2446,7 @@ class Eynollah:
                                                      image['scale_x'], image['scale_y'], image['name'])
 
         t_order = time.time()
-        self.logger.info("Step 4/5: Reading Order Detection")
+        self.logger.info("Step 4/5: Reading Order")
         if self.right2left:
             self.logger.info("Right-to-left mode enabled")
         if self.headers_off:
diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py
index 858821b..79c75d1 100644
--- a/src/eynollah/utils/marginals.py
+++ b/src/eynollah/utils/marginals.py
@@ -6,15 +6,18 @@ from .contour import find_center_of_contours, return_contours_of_interested_regi
 from .resize import resize_image
 from .rotate import rotate_image
 
-def get_marginals(text_mask, early_layout, num_col, slope_deskew,
+def get_marginals(num_col, slope_deskew, early_layout,
                   kernel=None,
                   label_text=1,
                   label_marg=4,
+                  label_tabs=10,
 ):
     if kernel is None:
         kernel = np.ones((5, 5), dtype=np.uint8)
     kernel_hor = np.ones((1, 5), dtype=np.uint8)
 
+    text_mask = ((early_layout == label_text) |
+                 (early_layout == label_tabs)).astype(np.uint8)
     text_mask_d = rotate_image(text_mask, slope_deskew)
     main_mask_d = np.zeros_like(text_mask_d)
     height, width = main_mask_d.shape
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index a705a16..47fc32f 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -166,7 +166,6 @@ class EynollahXmlWriter:
         conf_images=None,
         conf_tables=None,
         conf_drops=None,
-        skip_layout_reading_order=False,
     ):
         self.logger.debug('enter build_pagexml')
 
@@ -180,10 +179,7 @@ class EynollahXmlWriter:
         if len(cont_page):
             page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_points(cont_page[0]))))
 
-        if skip_layout_reading_order:
-            offset = None
-        else:
-            offset = [page_coord[2], page_coord[0]]
+        offset = [page_coord[2], page_coord[0]]
         counter = EynollahIdCounter()
         if len(order_of_texts):
             _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
@@ -283,7 +279,7 @@ class EynollahXmlWriter:
         for region_contour in polygons_seplines:
             page.add_SeparatorRegion(
                 SeparatorRegionType(id=counter.next_region_id,
-                                    Coords=CoordsType(points=self.calculate_points(region_contour, None))))
+                                    Coords=CoordsType(points=self.calculate_points(region_contour, offset))))
 
         for mm, region_contour in enumerate(found_polygons_tables):
             table = TableRegionType(