Merge e428e7ad78 into 38c028c6b5

2025-11-17 01:44:14 +01:00 · 2025-11-16 15:36:06 +00:00 · 2025-11-16 15:36:06 +00:00 · 850221d9ea
commit 850221d9ea
parent 38c028c6b5 e428e7ad78
7 changed files with 1086 additions and 1414 deletions
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@ -79,18 +79,28 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
    type=click.Path(file_okay=True, dir_okay=True),
    required=True,
 )
+@click.option(
+    "--overwrite",
+    "-O",
+    help="overwrite (instead of skipping) if output xml exists",
+    is_flag=True,
+)
@click.option(
    "--log_level",
    "-l",
    type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
    help="Override log level globally to this",
 )
-def binarization(patches, model_dir, input_image, dir_in, output, log_level):
+def binarization(patches, model_dir, input_image, dir_in, output, overwrite, log_level):
    assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
    binarizer = SbbBinarizer(model_dir)
    if log_level:
-        binarizer.log.setLevel(getLevelName(log_level))
-    binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in)
+        binarizer.logger.setLevel(getLevelName(log_level))
+    binarizer.run(overwrite=overwrite,
+                  use_patches=patches,
+                  image_path=input_image,
+                  output=output,
+                  dir_in=dir_in)


@main.command()
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -88,12 +88,7 @@ from .utils.contour import (
    join_polygons,
    make_intersection,
 )
-from .utils.rotate import (
-    rotate_image,
-    rotation_not_90_func,
-    rotation_not_90_func_full_layout,
-    rotation_image_new
-)
+from .utils.rotate import rotate_image
 from .utils.utils_ocr import (
    return_start_and_end_of_common_text_of_textline_ocr_without_common_section,
    return_textline_contour_with_added_box_coordinate,
@ -139,7 +134,6 @@ from .utils import (
    return_boxes_of_images_by_order_of_reading_new
 )
 from .utils.pil_cv2 import check_dpi, pil2cv
-from .utils.xml import order_and_id_of_texts
 from .plot import EynollahPlotter
 from .writer import EynollahXmlWriter

@ -2091,19 +2085,19 @@ class Eynollah:
        prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
        prediction_regions_org=prediction_regions_org[:,:,0]

-        mask_lines_only = (prediction_regions_org[:,:] ==3)*1
+        mask_seps_only = (prediction_regions_org[:,:] == 3)*1
        mask_texts_only = (prediction_regions_org[:,:] ==1)*1
        mask_images_only=(prediction_regions_org[:,:] ==2)*1

-        polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+        polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
        polygons_seplines = filter_contours_area_of_image(
-            mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+            mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)

        polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
-        polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
+        polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)

        text_regions_p_true = np.zeros(prediction_regions_org.shape)
-        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
+        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_seps, color=(3,3,3))

        text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
@ -2282,7 +2276,7 @@ class Eynollah:
        img_bin = resize_image(img_bin, img_height_h, img_width_h )
        prediction_regions_org=prediction_regions_org[:,:,0]

-        mask_lines_only = (prediction_regions_org[:,:] ==3)*1
+        mask_seps_only = (prediction_regions_org[:,:] == 3)*1
        mask_texts_only = (prediction_regions_org[:,:] ==1)*1
        mask_texts_only = mask_texts_only.astype('uint8')

@ -2293,7 +2287,7 @@ class Eynollah:
        mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
        mask_images_only=(prediction_regions_org[:,:] ==2)*1

-        polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+        polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
        test_khat = np.zeros(prediction_regions_org.shape)
        test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1))

@ -2307,7 +2301,7 @@ class Eynollah:
            #plt.show()

        polygons_seplines = filter_contours_area_of_image(
-            mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+            mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)

        test_khat = np.zeros(prediction_regions_org.shape)
        test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1))
@ -2318,10 +2312,10 @@ class Eynollah:

        polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
        ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts)
-        polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
+        polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)

        text_regions_p_true = np.zeros(prediction_regions_org.shape)
-        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3))
+        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_seps, color=(3,3,3))

        text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
@ -2377,7 +2371,7 @@ class Eynollah:
            prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )

            mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
-            mask_lines2 = (prediction_regions_org2[:,:,0] == 3)
+            mask_seps2 = (prediction_regions_org2[:,:,0] == 3)
            text_sume_early = (prediction_regions_org[:,:] == 1).sum()
            prediction_regions_org_copy = np.copy(prediction_regions_org)
            prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0
@ -2388,8 +2382,8 @@ class Eynollah:
            if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
                prediction_regions_org = np.copy(prediction_regions_org_copy)

-            prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
-            mask_lines_only=(prediction_regions_org[:,:]==3)*1
+            prediction_regions_org[(mask_seps2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
+            mask_seps_only=(prediction_regions_org[:,:]==3)*1
            prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
            prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)

@ -2411,20 +2405,20 @@ class Eynollah:
                prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
                prediction_regions_org=prediction_regions_org[:,:,0]

-                mask_lines_only=(prediction_regions_org[:,:]==3)*1
+                mask_seps_only=(prediction_regions_org[:,:]==3)*1

            mask_texts_only=(prediction_regions_org[:,:]==1)*1
            mask_images_only=(prediction_regions_org[:,:]==2)*1

-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+            polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+                mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)

            polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
-            polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
+            polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only, 1, 0.00001)

            text_regions_p_true = np.zeros(prediction_regions_org.shape)
-            text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3))
+            text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_seps, color=(3, 3, 3))
            text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2

            text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
@ -2449,7 +2443,7 @@ class Eynollah:
            prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
            prediction_regions_org=prediction_regions_org[:,:,0]

-            #mask_lines_only=(prediction_regions_org[:,:]==3)*1
+            #mask_seps_only=(prediction_regions_org[:,:]==3)*1
            #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))

            #prediction_regions_org = self.do_prediction(True, img, self.models["region"])
@ -2457,19 +2451,19 @@ class Eynollah:
            #prediction_regions_org = prediction_regions_org[:,:,0]
            #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0

-            mask_lines_only = (prediction_regions_org == 3)*1
+            mask_seps_only = (prediction_regions_org == 3)*1
            mask_texts_only = (prediction_regions_org == 1)*1
            mask_images_only= (prediction_regions_org == 2)*1

-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+            polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
+                mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)

            polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
-            polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
+            polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)

            text_regions_p_true = np.zeros(prediction_regions_org.shape)
-            text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
+            text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_seps, color=(3,3,3))

            text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
            text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
@ -2491,11 +2485,15 @@ class Eynollah:
            contours_only_text_parent)
        cx_head, cy_head, mx_head, Mx_head, my_head, My_head, mxy_head = find_new_features_of_contours(
            contours_only_text_parent_h)
+        cx_main = np.array(cx_main, dtype=int)
+        cy_main = np.array(cy_main, dtype=int)
+        cx_head = np.array(cx_head, dtype=int)
+        cy_head = np.array(cy_head, dtype=int)

        def match_boxes(only_centers: bool):
            arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int)
            for ii in range(len(contours_only_text_parent)):
-                check_if_textregion_located_in_a_box = False
+                box_found = False
                for jj, box in enumerate(boxes):
                    if ((cx_main[ii] >= box[0] and
                         cx_main[ii] < box[1] and
@ -2506,20 +2504,23 @@ class Eynollah:
                         my_main[ii] >= box[2] and
                         My_main[ii] < box[3])):
                        arg_text_con_main[ii] = jj
-                        check_if_textregion_located_in_a_box = True
+                        box_found = True
+                        # print("main/matched ", ii, "\t", (mx_main[ii], Mx_main[ii], my_main[ii], My_main[ii]), "\tin", jj, box, only_centers)
                        break
-                if not check_if_textregion_located_in_a_box:
+                if not box_found:
                    dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_main[ii]], [cx_main[ii]]]), axis=0)
                    pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
                                         (boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
+                    assert pcontained_in_box.any(), (ii, cx_main[ii], cy_main[ii])
                    ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
                    arg_text_con_main[ii] = ind_min
+                    # print("main/fallback ", ii, "\t", (mx_main[ii], Mx_main[ii], my_main[ii], My_main[ii]), "\tin", ind_min, boxes[ind_min], only_centers)
            args_contours_main = np.arange(len(contours_only_text_parent))
            order_by_con_main = np.zeros_like(arg_text_con_main)

            arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int)
            for ii in range(len(contours_only_text_parent_h)):
-                check_if_textregion_located_in_a_box = False
+                box_found = False
                for jj, box in enumerate(boxes):
                    if ((cx_head[ii] >= box[0] and
                         cx_head[ii] < box[1] and
@ -2530,20 +2531,21 @@ class Eynollah:
                         my_head[ii] >= box[2] and
                         My_head[ii] < box[3])):
                        arg_text_con_head[ii] = jj
-                        check_if_textregion_located_in_a_box = True
+                        box_found = True
+                        # print("head/matched ", ii, "\t", (mx_head[ii], Mx_head[ii], my_head[ii], My_head[ii]), "\tin", jj, box, only_centers)
                        break
-                if not check_if_textregion_located_in_a_box:
+                if not box_found:
                    dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_head[ii]], [cx_head[ii]]]), axis=0)
                    pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) &
                                         (boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1]))
+                    assert pcontained_in_box.any(), (ii, cx_head[ii], cy_head[ii])
                    ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
                    arg_text_con_head[ii] = ind_min
+                    # print("head/fallback ", ii, "\t", (mx_head[ii], Mx_head[ii], my_head[ii], My_head[ii]), "\tin", ind_min, boxes[ind_min], only_centers)
            args_contours_head = np.arange(len(contours_only_text_parent_h))
            order_by_con_head = np.zeros_like(arg_text_con_head)

-            ref_point = 0
-            order_of_texts_tot = []
-            id_of_texts_tot = []
+            idx = 0
            for iij, box in enumerate(boxes):
                ys = slice(*box[2:4])
                xs = slice(*box[0:2])
@ -2552,42 +2554,30 @@ class Eynollah:
                con_inter_box = contours_only_text_parent[args_contours_box_main]
                con_inter_box_h = contours_only_text_parent_h[args_contours_box_head]

-                indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
-                    textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
+                _, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
+                    textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2], box[0])

-                order_of_texts, id_of_texts = order_and_id_of_texts(
-                    con_inter_box, con_inter_box_h,
-                    indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
+                for tidx, kind in zip(index_by_kind_sorted, kind_of_texts_sorted):
+                    if kind == 1:
+                        # print(iij, "main", args_contours_box_main[tidx], "becomes", idx)
+                        order_by_con_main[args_contours_box_main[tidx]] = idx
+                    else:
+                        # print(iij, "head", args_contours_box_head[tidx], "becomes", idx)
+                        order_by_con_head[args_contours_box_head[tidx]] = idx
+                    idx += 1

-                indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
-                indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
-                indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
-                indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
-
-                for zahler, _ in enumerate(args_contours_box_main):
-                    arg_order_v = indexes_sorted_main[zahler]
-                    order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \
-                        np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
-
-                for zahler, _ in enumerate(args_contours_box_head):
-                    arg_order_v = indexes_sorted_head[zahler]
-                    order_by_con_head[args_contours_box_head[indexes_by_type_head[zahler]]] = \
-                        np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
-
-                for jji in range(len(id_of_texts)):
-                    order_of_texts_tot.append(order_of_texts[jji] + ref_point)
-                    id_of_texts_tot.append(id_of_texts[jji])
-                ref_point += len(id_of_texts)
-
-            order_of_texts_tot = np.concatenate((order_by_con_main,
-                                                 order_by_con_head))
-            order_text_new = np.argsort(order_of_texts_tot)
-            return order_text_new, id_of_texts_tot
+            # xml writer will create region ids in order of
+            # - contours_only_text_parent (main text), followed by
+            # - contours_only_text_parent (headings),
+            # and then create regionrefs into these ordered by order_text_new
+            order_text_new = np.argsort(np.concatenate((order_by_con_main,
+                                                        order_by_con_head)))
+            return order_text_new

        try:
            results = match_boxes(False)
        except Exception as why:
-            self.logger.error(why)
+            self.logger.exception(why)
            results = match_boxes(True)

        self.logger.debug("exit do_order_of_regions")
@ -2665,45 +2655,35 @@ class Eynollah:

        return layout_org, contours_new

-    def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table):
+    def delete_separator_around(self, splitter_y, peaks_neg, image_by_region, label_seps, label_table):
        # format of subboxes: box=[x1, x2 , y1, y2]
        pix_del = 100
-        if len(image_by_region.shape)==3:
-            for i in range(len(spliter_y)-1):
+        for i in range(len(splitter_y)-1):
            for j in range(1,len(peaks_neg[i])-1):
-                    ys = slice(int(spliter_y[i]),
-                               int(spliter_y[i+1]))
-                    xs = slice(peaks_neg[i][j] - pix_del,
-                               peaks_neg[i][j] + pix_del)
-                    image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0
-                    image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0
-                    image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0
-
-                    image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0
-                    image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0
-                    image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0
+                where = np.index_exp[splitter_y[i]:
+                                     splitter_y[i+1],
+                                     peaks_neg[i][j] - pix_del:
+                                     peaks_neg[i][j] + pix_del,
+                                     :]
+                if image_by_region.ndim < 3:
+                    where = where[:2]
                else:
-            for i in range(len(spliter_y)-1):
-                for j in range(1,len(peaks_neg[i])-1):
-                    ys = slice(int(spliter_y[i]),
-                               int(spliter_y[i+1]))
-                    xs = slice(peaks_neg[i][j] - pix_del,
-                               peaks_neg[i][j] + pix_del)
-                    image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0
-                    image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0
+                    print("image_by_region ndim is 3!") # rs
+                image_by_region[where][image_by_region[where] == label_seps] = 0
+                image_by_region[where][image_by_region[where] == label_table] = 0
        return image_by_region

    def add_tables_heuristic_to_layout(
            self, image_regions_eraly_p, boxes,
-            slope_mean_hor, spliter_y, peaks_neg_tot, image_revised,
-            num_col_classifier, min_area, pixel_line):
+            slope_mean_hor, splitter_y, peaks_neg_tot, image_revised,
+            num_col_classifier, min_area, label_seps):

-        pixel_table =10
-        image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table)
+        label_table =10
+        image_revised_1 = self.delete_separator_around(splitter_y, peaks_neg_tot, image_revised, label_seps, label_table)

        try:
-            image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0
-            image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0
+            image_revised_1[:,:30][image_revised_1[:,:30]==label_seps] = 0
+            image_revised_1[:,-30:][image_revised_1[:,-30:]==label_seps] = 0
        except:
            pass
        boxes = np.array(boxes, dtype=int) # to be on the safe side
@ -2714,7 +2694,7 @@ class Eynollah:
            _, thresh = cv2.threshold(image_col, 0, 255, 0)
            contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)

-            if indiv==pixel_table:
+            if indiv==label_table:
                main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy,
                                                                     max_area=1, min_area=0.001)
            else:
@ -2730,11 +2710,11 @@ class Eynollah:
                box_xs = slice(*boxes[i][0:2])
                image_box = img_comm[box_ys, box_xs]
                try:
-                    image_box_tabels_1 = (image_box == pixel_table) * 1
+                    image_box_tabels_1 = (image_box == label_table) * 1
                    contours_tab,_=return_contours_of_image(image_box_tabels_1)
                    contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003)
-                    image_box_tabels_1 = (image_box == pixel_line).astype(np.uint8) * 1
-                    image_box_tabels_and_m_text = ( (image_box == pixel_table) |
+                    image_box_tabels_1 = (image_box == label_seps).astype(np.uint8) * 1
+                    image_box_tabels_and_m_text = ( (image_box == label_table) |
                                                    (image_box == 1) ).astype(np.uint8) * 1

                    image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5)
@ -2796,7 +2776,7 @@ class Eynollah:
                    y_up_tabs=[]

                for ii in range(len(y_up_tabs)):
-                    image_box[y_up_tabs[ii]:y_down_tabs[ii]] = pixel_table
+                    image_box[y_up_tabs[ii]:y_down_tabs[ii]] = label_table

                image_revised_last[box_ys, box_xs] = image_box
        else:
@ -2807,14 +2787,14 @@ class Eynollah:
                image_revised_last[box_ys, box_xs] = image_box

        if num_col_classifier==1:
-            img_tables_col_1 = (image_revised_last == pixel_table).astype(np.uint8)
+            img_tables_col_1 = (image_revised_last == label_table).astype(np.uint8)
            contours_table_col1, _ = return_contours_of_image(img_tables_col_1)

            _,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1)

            if len(y_min_tab_col1)>0:
                for ijv in range(len(y_min_tab_col1)):
-                    image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv])] = pixel_table
+                    image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv])] = label_table
        return image_revised_last

    def get_tables_from_model(self, img, num_col_classifier):
@ -2952,8 +2932,8 @@ class Eynollah:
        mask_images = (text_regions_p_1[:, :] == 2) * 1
        mask_images = mask_images.astype(np.uint8)
        mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
-        mask_lines = (text_regions_p_1[:, :] == 3) * 1
-        mask_lines = mask_lines.astype(np.uint8)
+        mask_seps = (text_regions_p_1[:, :] == 3) * 1
+        mask_seps = mask_seps.astype(np.uint8)
        img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
        img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)

@ -2976,10 +2956,10 @@ class Eynollah:
                                     max(self.num_col_lower or num_col_classifier,
                                         num_col_classifier))
        except Exception as why:
-            self.logger.error(why)
+            self.logger.exception(why)
            num_col = None
        #print("inside graphics 3 ", time.time() - t_in_gr)
-        return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines,
+        return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps,
                text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light)

    def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light):
@ -3029,8 +3009,8 @@ class Eynollah:
        mask_images = (text_regions_p_1[:, :] == 2) * 1
        mask_images = mask_images.astype(np.uint8)
        mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
-        mask_lines = (text_regions_p_1[:, :] == 3) * 1
-        mask_lines = mask_lines.astype(np.uint8)
+        mask_seps = (text_regions_p_1[:, :] == 3) * 1
+        mask_seps = mask_seps.astype(np.uint8)
        img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
        img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)

@ -3044,9 +3024,9 @@ class Eynollah:
            if not num_column_is_classified:
                num_col_classifier = num_col + 1
        except Exception as why:
-            self.logger.error(why)
+            self.logger.exception(why)
            num_col = None
-        return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines,
+        return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps,
                text_regions_p_1, cont_page, table_prediction)

    def run_enhancement(self, light_version):
@ -3101,13 +3081,13 @@ class Eynollah:
        return slope_deskew

    def run_marginals(
-            self, textline_mask_tot_ea, mask_images, mask_lines,
+            self, textline_mask_tot_ea, mask_images, mask_seps,
            num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):

        textline_mask_tot = textline_mask_tot_ea[:, :]
        textline_mask_tot[mask_images[:, :] == 1] = 0

-        text_regions_p_1[mask_lines[:, :] == 1] = 3
+        text_regions_p_1[mask_seps[:, :] == 1] = 3
        text_regions_p = text_regions_p_1[:, :]
        text_regions_p = np.array(text_regions_p)
        if num_col_classifier in (1, 2):
@ -3131,12 +3111,10 @@ class Eynollah:
        self.logger.debug('enter run_boxes_no_full_layout')
        t_0_box = time.time()
        if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-            _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(
-                image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
-            text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
-            textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
-            table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1])
-            regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
+            textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
+            text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
+            table_prediction_n = rotate_image(table_prediction, slope_deskew)
+            regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
            if self.tables:
                regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
        regions_without_separators = (text_regions_p[:, :] == 1) * 1
@ -3146,17 +3124,17 @@ class Eynollah:
        if self.tables:
            regions_without_separators[table_prediction ==1 ] = 1
        if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-            text_regions_p_1_n = None
+            text_regions_p_d = None
            textline_mask_tot_d = None
            regions_without_separators_d = None
-        pixel_lines = 3
+        label_seps = 3
        if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-            _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
-                text_regions_p, num_col_classifier, self.tables, pixel_lines)
+            _, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
+                text_regions_p, num_col_classifier, self.tables, label_seps)

        if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-            _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
-                text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines)
+            _, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
+                text_regions_p_d, num_col_classifier, self.tables, label_seps)
        #print(time.time()-t_0_box,'time box in 2')
        self.logger.info("num_col_classifier: %s", num_col_classifier)

@ -3171,7 +3149,7 @@ class Eynollah:
        t1 = time.time()
        if np.abs(slope_deskew) < SLOPE_THRESHOLD:
            boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
-                splitter_y_new, regions_without_separators, matrix_of_lines_ch,
+                splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch,
                num_col_classifier, erosion_hurts, self.tables, self.right2left)
            boxes_d = None
            self.logger.debug("len(boxes): %s", len(boxes))
@ -3183,17 +3161,17 @@ class Eynollah:
                else:
                    text_regions_p_tables = np.copy(text_regions_p)
                    text_regions_p_tables[(table_prediction == 1)] = 10
-                    pixel_line = 3
+                    label_seps = 3
                    img_revised_tab2 = self.add_tables_heuristic_to_layout(
                        text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables,
-                        num_col_classifier , 0.000005, pixel_line)
+                        num_col_classifier , 0.000005, label_seps)
                    #print(time.time()-t_0_box,'time box in 3.2')
                    img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(
                        img_revised_tab2, table_prediction, 10, num_col_classifier)
                    #print(time.time()-t_0_box,'time box in 3.3')
        else:
            boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
-                splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
+                splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d,
                num_col_classifier, erosion_hurts, self.tables, self.right2left)
            boxes = None
            self.logger.debug("len(boxes): %s", len(boxes_d))
@ -3202,15 +3180,15 @@ class Eynollah:
                if self.light_version:
                    pass
                else:
-                    text_regions_p_tables = np.copy(text_regions_p_1_n)
+                    text_regions_p_tables = np.copy(text_regions_p_d)
                    text_regions_p_tables = np.round(text_regions_p_tables)
                    text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10

-                    pixel_line = 3
+                    label_seps = 3
                    img_revised_tab2 = self.add_tables_heuristic_to_layout(
                        text_regions_p_tables, boxes_d, 0, splitter_y_new_d,
                        peaks_neg_tot_tables_d, text_regions_p_tables,
-                        num_col_classifier, 0.000005, pixel_line)
+                        num_col_classifier, 0.000005, label_seps)
                    img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
                        img_revised_tab2, table_prediction_n, 10, num_col_classifier)

@ -3245,22 +3223,22 @@ class Eynollah:
        else:
            polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2)

-        pixel_img = 4
+        label_marginalia = 4
        min_area_mar = 0.00001
        if self.light_version:
-            marginal_mask = (text_regions_p[:,:]==pixel_img)*1
+            marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
            marginal_mask = marginal_mask.astype('uint8')
            marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)

            polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
        else:
-            polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
+            polygons_of_marginals = return_contours_of_interested_region(text_regions_p, label_marginalia, min_area_mar)

-        pixel_img = 10
-        contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
+        label_tables = 10
+        contours_tables = return_contours_of_interested_region(text_regions_p, label_tables, min_area_mar)
        #print(time.time()-t_0_box,'time box in 5')
        self.logger.debug('exit run_boxes_no_full_layout')
-        return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d,
+        return (polygons_of_images, img_revised_tab, text_regions_p_d, textline_mask_tot_d,
                regions_without_separators_d, boxes, boxes_d,
                polygons_of_marginals, contours_tables)

@ -3276,24 +3254,13 @@ class Eynollah:
                text_regions_p[:,:][table_prediction[:,:]==1] = 10
                img_revised_tab = text_regions_p[:,:]
                if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
-                        rotation_not_90_func(image_page, textline_mask_tot, text_regions_p,
-                                             table_prediction, slope_deskew)
-
-                    text_regions_p_1_n = resize_image(text_regions_p_1_n,
-                                                      text_regions_p.shape[0],
-                                                      text_regions_p.shape[1])
-                    textline_mask_tot_d = resize_image(textline_mask_tot_d,
-                                                       text_regions_p.shape[0],
-                                                       text_regions_p.shape[1])
-                    table_prediction_n = resize_image(table_prediction_n,
-                                                      text_regions_p.shape[0],
-                                                      text_regions_p.shape[1])
-
-                    regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
+                    textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
+                    text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
+                    table_prediction_n = rotate_image(table_prediction, slope_deskew)
+                    regions_without_separators_d = (text_regions_p_d[:,:] == 1)*1
                    regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
                else:
-                    text_regions_p_1_n = None
+                    text_regions_p_d = None
                    textline_mask_tot_d = None
                    regions_without_separators_d = None
                # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1
@ -3303,24 +3270,13 @@ class Eynollah:

            else:
                if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
-                        rotation_not_90_func(image_page, textline_mask_tot, text_regions_p,
-                                             table_prediction, slope_deskew)
-
-                    text_regions_p_1_n = resize_image(text_regions_p_1_n,
-                                                      text_regions_p.shape[0],
-                                                      text_regions_p.shape[1])
-                    textline_mask_tot_d = resize_image(textline_mask_tot_d,
-                                                       text_regions_p.shape[0],
-                                                       text_regions_p.shape[1])
-                    table_prediction_n = resize_image(table_prediction_n,
-                                                      text_regions_p.shape[0],
-                                                      text_regions_p.shape[1])
-
-                    regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
+                    textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
+                    text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
+                    table_prediction_n = rotate_image(table_prediction, slope_deskew)
+                    regions_without_separators_d = (text_regions_p_d[:,:] == 1)*1
                    regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
                else:
-                    text_regions_p_1_n = None
+                    text_regions_p_d = None
                    textline_mask_tot_d = None
                    regions_without_separators_d = None

@ -3329,14 +3285,14 @@ class Eynollah:
                regions_without_separators = (text_regions_p[:,:] == 1)*1
                regions_without_separators[table_prediction == 1] = 1

-                pixel_lines=3
+                label_seps=3
                if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-                    num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
-                        text_regions_p, num_col_classifier, self.tables, pixel_lines)
+                    num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
+                        text_regions_p, num_col_classifier, self.tables, label_seps)

                if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
-                        text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines)
+                    num_col_d, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
+                        text_regions_p_d, num_col_classifier, self.tables, label_seps)

                if num_col_classifier>=3:
                    if np.abs(slope_deskew) < SLOPE_THRESHOLD:
@ -3351,30 +3307,30 @@ class Eynollah:

                if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                    boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
-                        splitter_y_new, regions_without_separators, matrix_of_lines_ch,
+                        splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch,
                        num_col_classifier, erosion_hurts, self.tables, self.right2left)
                    text_regions_p_tables = np.copy(text_regions_p)
                    text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
-                    pixel_line = 3
+                    label_seps = 3
                    img_revised_tab2 = self.add_tables_heuristic_to_layout(
                        text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables,
-                        num_col_classifier , 0.000005, pixel_line)
+                        num_col_classifier , 0.000005, label_seps)

                    img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(
                        img_revised_tab2, table_prediction, 10, num_col_classifier)
                else:
                    boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
-                        splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
+                        splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d,
                        num_col_classifier, erosion_hurts, self.tables, self.right2left)
-                    text_regions_p_tables = np.copy(text_regions_p_1_n)
+                    text_regions_p_tables = np.copy(text_regions_p_d)
                    text_regions_p_tables = np.round(text_regions_p_tables)
                    text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10

-                    pixel_line = 3
+                    label_seps = 3
                    img_revised_tab2 = self.add_tables_heuristic_to_layout(
                        text_regions_p_tables, boxes_d, 0, splitter_y_new_d,
                        peaks_neg_tot_tables_d, text_regions_p_tables,
-                        num_col_classifier, 0.000005, pixel_line)
+                        num_col_classifier, 0.000005, label_seps)

                    img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
                        img_revised_tab2, table_prediction_n, 10, num_col_classifier)
@ -3399,20 +3355,20 @@ class Eynollah:
                text_regions_p[img_revised_tab == 10] = 10
                #img_revised_tab[img_revised_tab2 == 10] = 10

-        pixel_img = 4
+        label_marginalia = 4
        min_area_mar = 0.00001

        if self.light_version:
-            marginal_mask = (text_regions_p[:,:]==pixel_img)*1
+            marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
            marginal_mask = marginal_mask.astype('uint8')
            marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)

            polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
        else:
-            polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
+            polygons_of_marginals = return_contours_of_interested_region(text_regions_p, label_marginalia, min_area_mar)

-        pixel_img = 10
-        contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
+        label_tables = 10
+        contours_tables = return_contours_of_interested_region(text_regions_p, label_tables, min_area_mar)

        # set first model with second model
        text_regions_p[:, :][text_regions_p[:, :] == 2] = 5
@ -3465,16 +3421,13 @@ class Eynollah:
        #plt.show()
        ####if not self.tables:
        if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-            _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(
-                image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
-
-            text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
-            textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
-            regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1])
+            textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
+            text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
+            regions_fully_n = rotate_image(regions_fully, slope_deskew)
            if not self.tables:
-                regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
+                regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
        else:
-            text_regions_p_1_n = None
+            text_regions_p_d = None
            textline_mask_tot_d = None
            regions_without_separators_d = None
        if not self.tables:
@ -3484,7 +3437,7 @@ class Eynollah:

        self.logger.debug('exit run_boxes_full_layout')
        #print("full inside 3", time.time()- t_full0)
-        return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d,
+        return (polygons_of_images, img_revised_tab, text_regions_p_d, textline_mask_tot_d,
                regions_without_separators_d, regions_fully, regions_without_separators,
                polygons_of_marginals, contours_tables)

@ -3632,7 +3585,7 @@ class Eynollah:
                co_text_all = contours_only_text_parent

        if not len(co_text_all):
-            return [], []
+            return []

        labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
        co_text_all = [(i/6).astype(int) for i in co_text_all]
@ -3715,11 +3668,9 @@ class Eynollah:
                else:
                    org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]])
            
-            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
-            return org_contours_indexes, region_ids
+            return org_contours_indexes
        else:
-            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
-            return ordered, region_ids
+            return ordered

    def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot):
        width = np.shape(textline_image)[1]
@ -4213,7 +4164,7 @@ class Eynollah:
                image_page, page_coord, cont_page = \
                self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
            pcgts = self.writer.build_pagexml_no_full_layout(
-                [], page_coord, [], [], [], [],
+                [], page_coord, [], [], [],
                polygons_of_images, [], [], [], [], [], [], [], [], [],
                cont_page, [], [])
            if self.plotter:
@ -4254,7 +4205,6 @@ class Eynollah:
            
            order_text_new = [0]
            slopes =[0]
-            id_of_texts_tot =['region_0001']
            conf_contours_textregions =[0]
            
            if self.ocr and not self.tr:
@ -4266,7 +4216,7 @@ class Eynollah:
                ocr_all_textlines = None
            
            pcgts = self.writer.build_pagexml_no_full_layout(
-                cont_page, page_coord, order_text_new, id_of_texts_tot,
+                cont_page, page_coord, order_text_new,
                all_found_textline_polygons, page_coord, [],
                [], [], [], [], [], [],
                slopes, [], [],
@ -4301,7 +4251,7 @@ class Eynollah:
                slope_deskew = self.run_deskew(textline_mask_tot_ea)
            #print("text region early -2,5 in %.1fs", time.time() - t0)
            #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
-            num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
+            num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps, \
                text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \
                    self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea,
                                                        num_col_classifier, num_column_is_classified,
@ -4318,7 +4268,7 @@ class Eynollah:
            confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))

            t1 = time.time()
-            num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
+            num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps, \
                text_regions_p_1, cont_page, table_prediction = \
                    self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified,
                                                  erosion_hurts)
@ -4332,7 +4282,7 @@ class Eynollah:
            self.logger.info("No columns detected - generating empty PAGE-XML")
    
            pcgts = self.writer.build_pagexml_no_full_layout(
-                [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [],
+                [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [],
                cont_page, [], [])
            return pcgts

@ -4356,12 +4306,12 @@ class Eynollah:
            image_page = resize_image(image_page,img_h_new, img_w_new )
            textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
            mask_images = resize_image(mask_images,img_h_new, img_w_new )
-            mask_lines = resize_image(mask_lines,img_h_new, img_w_new )
+            mask_seps = resize_image(mask_seps, img_h_new, img_w_new)
            text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
            table_prediction = resize_image(table_prediction,img_h_new, img_w_new )

        textline_mask_tot, text_regions_p = \
-            self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines,
+            self.run_marginals(textline_mask_tot_ea, mask_images, mask_seps,
                               num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
        if self.plotter:
            self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
@ -4398,14 +4348,14 @@ class Eynollah:
        ## birdan sora chock chakir
        t1 = time.time()
        if not self.full_layout:
-            polygons_of_images, img_revised_tab, text_regions_p_1_n, \
+            polygons_of_images, img_revised_tab, text_regions_p_d, \
                textline_mask_tot_d, regions_without_separators_d, \
                boxes, boxes_d, polygons_of_marginals, contours_tables = \
                self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                              num_col_classifier, table_prediction, erosion_hurts)
            ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
        else:
-            polygons_of_images, img_revised_tab, text_regions_p_1_n, \
+            polygons_of_images, img_revised_tab, text_regions_p_d, \
                textline_mask_tot_d, regions_without_separators_d, \
                regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
                self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
@ -4419,7 +4369,7 @@ class Eynollah:

        text_only = (img_revised_tab[:, :] == 1) * 1
        if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-            text_only_d = (text_regions_p_1_n[:, :] == 1) * 1
+            text_only_d = ((text_regions_p_d[:, :] == 1)) * 1

        #print("text region early 2 in %.1fs", time.time() - t0)
        ###min_con_area = 0.000005
@ -4493,7 +4443,7 @@ class Eynollah:
                        dists[i] = np.linalg.norm(centers[:, i:i + 1] - centers_d, axis=0)
                    corresp = np.zeros(dists.shape, dtype=bool)
                    # keep searching next-closest until at least one correspondence on each side
-                    while not np.all(corresp.sum(axis=1)) and not np.all(corresp.sum(axis=0)):
+                    while not np.all(corresp.sum(axis=1)) or not np.all(corresp.sum(axis=0)):
                        idx = np.nanargmin(dists)
                        i, j = np.unravel_index(idx, dists.shape)
                        dists[i, j] = np.nan
@ -4504,7 +4454,7 @@ class Eynollah:
                    # img1 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
                    # for i in range(len(contours_only_text_parent)):
                    #     cv2.fillPoly(img1, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1)
-                    # plt.subplot(2, 2, 1, title="direct corresp contours")
+                    # plt.subplot(1, 4, 1, title="direct corresp contours")
                    # plt.imshow(img1)
                    # img2 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
                    # join deskewed regions mapping to single original ones
@ -4517,13 +4467,13 @@ class Eynollah:
                            contour_d = polygon2contour(join_polygons(polygons_d))
                            contours_only_text_parent_d_ordered[i] = contour_d
                    #         cv2.fillPoly(img2, pts=[contour_d], color=i + 1)
-                    # plt.subplot(2, 2, 3, title="joined contours")
+                    # plt.subplot(1, 4, 2, title="joined contours")
                    # plt.imshow(img2)
                    # img3 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
                    # split deskewed regions mapping to multiple original ones
                    def deskew(polygon):
                        polygon = shapely.affinity.rotate(polygon, -slope_deskew, origin=center)
-                        polygon = shapely.affinity.translate(polygon, *offset.squeeze())
+                        #polygon = shapely.affinity.translate(polygon, *offset.squeeze())
                        return polygon
                    for j in range(len(contours_only_text_parent_d)):
                        if np.count_nonzero(corresp[:, j]) > 1:
@ -4541,21 +4491,45 @@ class Eynollah:
                                          if polygon_d]
                            contours_only_text_parent_d_ordered[indices] = contours_d
                    #         cv2.fillPoly(img3, pts=contours_d, color=j + 1)
-                    # plt.subplot(2, 2, 4, title="split contours")
+                    # plt.subplot(1, 4, 3, title="split contours")
                    # plt.imshow(img3)
                    # img4 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
                    # for i in range(len(contours_only_text_parent)):
                    #     cv2.fillPoly(img4, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1)
-                    # plt.subplot(2, 2, 2, title="result contours")
+                    # plt.subplot(1, 4, 4, title="result contours")
                    # plt.imshow(img4)
                    # plt.show()
+                # from matplotlib import patches as ptchs
+                # plt.subplot(1, 2, 1, title="undeskewed")
+                # plt.imshow(text_only)
+                # centers = np.stack(find_center_of_contours(contours_only_text_parent)) # [2, N]
+                # for i in range(len(contours_only_text_parent)):
+                #     cnt = contours_only_text_parent[i]
+                #     ctr = centers[:, i]
+                #     plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
+                #     plt.gca().scatter(ctr[0], ctr[1], 20, c='blue', marker='x')
+                #     plt.gca().text(ctr[0], ctr[1], str(i), c='blue')
+                # plt.subplot(1, 2, 2, title="deskewed")
+                # plt.imshow(text_only_d)
+                # centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d_ordered)) # [2, N]
+                # for i in range(len(contours_only_text_parent)):
+                #     cnt = contours_only_text_parent[i]
+                #     cnt = polygon2contour(deskew(contour2polygon(cnt)))
+                #     plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
+                # for i in range(len(contours_only_text_parent_d_ordered)):
+                #     cnt = contours_only_text_parent_d_ordered[i]
+                #     ctr = centers_d[:, i]
+                #     plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='red'))
+                #     plt.gca().scatter(ctr[0], ctr[1], 20, c='red', marker='x')
+                #     plt.gca().text(ctr[0], ctr[1], str(i), c='red')
+                # plt.show()

        if not len(contours_only_text_parent):
            # stop early
            empty_marginals = [[]] * len(polygons_of_marginals)
            if self.full_layout:
                pcgts = self.writer.build_pagexml_full_layout(
-                    [], [], page_coord, [], [], [], [], [], [],
+                    [], [], page_coord, [], [], [], [], [],
                    polygons_of_images, contours_tables, [],
                    polygons_of_marginals, polygons_of_marginals,
                    empty_marginals, empty_marginals,
@ -4564,7 +4538,7 @@ class Eynollah:
                    cont_page, polygons_seplines)
            else:
                pcgts = self.writer.build_pagexml_no_full_layout(
-                    [], page_coord, [], [], [], [],
+                    [], page_coord, [], [], [],
                    polygons_of_images,
                    polygons_of_marginals, polygons_of_marginals,
                    empty_marginals, empty_marginals,
@ -4695,18 +4669,18 @@ class Eynollah:
                label_seps = 6
                if not self.headers_off:
                    if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-                        num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
+                        num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
                            text_regions_p, num_col_classifier, self.tables,  label_seps, contours_only_text_parent_h)
                    else:
-                        _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
-                            text_regions_p_1_n, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered)
+                        _, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
+                            text_regions_p_d, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered)
                elif self.headers_off:
                    if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-                        num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
+                        num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
                            text_regions_p, num_col_classifier, self.tables,  label_seps)
                    else:
-                        _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
-                            text_regions_p_1_n, num_col_classifier, self.tables, label_seps)
+                        _, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
+                            text_regions_p_d, num_col_classifier, self.tables, label_seps)

                if num_col_classifier >= 3:
                    if np.abs(slope_deskew) < SLOPE_THRESHOLD:
@ -4717,13 +4691,13 @@ class Eynollah:
                        regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)

                if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-                    boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
-                        splitter_y_new, regions_without_separators, matrix_of_lines_ch,
+                    boxes, _ = return_boxes_of_images_by_order_of_reading_new(
+                        splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch,
                        num_col_classifier, erosion_hurts, self.tables, self.right2left,
                        logger=self.logger)
                else:
-                    boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
-                        splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
+                    boxes_d, _ = return_boxes_of_images_by_order_of_reading_new(
+                        splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d,
                        num_col_classifier, erosion_hurts, self.tables, self.right2left,
                        logger=self.logger)
        else:
@ -4744,14 +4718,14 @@ class Eynollah:
            self.logger.info("Headers ignored in reading order")

        if self.reading_order_machine_based:
-            order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
+            order_text_new = self.do_order_of_regions_with_model(
                contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
        else:
            if np.abs(slope_deskew) < SLOPE_THRESHOLD:
-                order_text_new, id_of_texts_tot = self.do_order_of_regions(
+                order_text_new = self.do_order_of_regions(
                    contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
            else:
-                order_text_new, id_of_texts_tot = self.do_order_of_regions(
+                order_text_new = self.do_order_of_regions(
                    contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered,
                    boxes_d, textline_mask_tot_d)
        self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
@ -4848,7 +4822,7 @@ class Eynollah:

        if self.full_layout:
            pcgts = self.writer.build_pagexml_full_layout(
-                contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
+                contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new,
                all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
                polygons_of_images, contours_tables, polygons_of_drop_capitals,
                polygons_of_marginals_left, polygons_of_marginals_right,
@ -4861,7 +4835,7 @@ class Eynollah:
                conf_contours_textregions, conf_contours_textregions_h)
        else:
            pcgts = self.writer.build_pagexml_no_full_layout(
-                contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot,
+                contours_only_text_parent, page_coord, order_text_new,
                all_found_textline_polygons, all_box_coord, polygons_of_images,
                polygons_of_marginals_left, polygons_of_marginals_right,
                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
--- a/src/eynollah/ocrd_cli_binarization.py
+++ b/src/eynollah/ocrd_cli_binarization.py
@ -70,7 +70,7 @@ class SbbBinarizeProcessor(Processor):

        if oplevel == 'page':
            self.logger.info("Binarizing on 'page' level in page '%s'", page_id)
-            page_image_bin = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True))
+            page_image_bin = cv2pil(self.binarizer.run_single(image=pil2cv(page_image), use_patches=True))
            # update PAGE (reference the image file):
            page_image_ref = AlternativeImageType(comments=page_xywh['features'] + ',binarized,clipped')
            page.add_AlternativeImage(page_image_ref)
@ -83,7 +83,7 @@ class SbbBinarizeProcessor(Processor):
            for region in regions:
                region_image, region_xywh = self.workspace.image_from_segment(
                    region, page_image, page_xywh, feature_filter='binarized')
-                region_image_bin = cv2pil(self.binarizer.run(image=pil2cv(region_image), use_patches=True))
+                region_image_bin = cv2pil(self.binarizer.run_single(image=pil2cv(region_image), use_patches=True))
                # update PAGE (reference the image file):
                region_image_ref = AlternativeImageType(comments=region_xywh['features'] + ',binarized')
                region.add_AlternativeImage(region_image_ref)
@ -95,7 +95,7 @@ class SbbBinarizeProcessor(Processor):
                self.logger.warning("Page '%s' contains no text lines", page_id)
            for line in lines:
                line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
-                line_image_bin = cv2pil(self.binarizer.run(image=pil2cv(line_image), use_patches=True))
+                line_image_bin = cv2pil(self.binarizer.run_single(image=pil2cv(line_image), use_patches=True))
                # update PAGE (reference the image file):
                line_image_ref = AlternativeImageType(comments=line_xywh['features'] + ',binarized')
                line.add_AlternativeImage(region_image_ref)
--- a/src/eynollah/sbb_binarize.py
+++ b/src/eynollah/sbb_binarize.py
@ -25,7 +25,7 @@ class SbbBinarizer:

    def __init__(self, model_dir, logger=None):
        self.model_dir = model_dir
-        self.log = logger if logger else logging.getLogger('SbbBinarizer')
+        self.logger = logger if logger else logging.getLogger('SbbBinarizer')

        self.start_new_session()

@ -315,47 +315,30 @@ class SbbBinarizer:
            prediction_true = prediction_true.astype(np.uint8)
        return prediction_true[:,:,0]

-    def run(self, image=None, image_path=None, output=None, use_patches=False, dir_in=None):
-        # print(dir_in,'dir_in')
-        if not dir_in:
-            if (image is not None and image_path is not None) or \
-                (image is None and image_path is None):
-                raise ValueError("Must pass either a opencv2 image or an image_path")
-            if image_path is not None:
-                image = cv2.imread(image_path)
-            img_last = 0
-            for n, (model, model_file) in enumerate(zip(self.models, self.model_files)):
-                self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files)))
-
-                res = self.predict(model, image, use_patches)
-
-                img_fin = np.zeros((res.shape[0], res.shape[1], 3))
-                res[:, :][res[:, :] == 0] = 2
-                res = res - 1
-                res = res * 255
-                img_fin[:, :, 0] = res
-                img_fin[:, :, 1] = res
-                img_fin[:, :, 2] = res
-
-                img_fin = img_fin.astype(np.uint8)
-                img_fin = (res[:, :] == 0) * 255
-                img_last = img_last + img_fin
-
-            kernel = np.ones((5, 5), np.uint8)
-            img_last[:, :][img_last[:, :] > 0] = 255
-            img_last = (img_last[:, :] == 0) * 255
-            if output:
-                cv2.imwrite(output, img_last)
-            return img_last
+    def run(self, image_path=None, output=None, dir_in=None, use_patches=False, overwrite=False):
+        if dir_in:
+            ls_imgs = [(os.path.join(dir_in, image_filename),
+                        os.path.join(output, os.path.splitext(image_filename)[0] + '.png'))
+                       for image_filename in filter(is_image_filename,
+                                                    os.listdir(dir_in))]
        else:
-            ls_imgs = list(filter(is_image_filename, os.listdir(dir_in)))
-            for image_name in ls_imgs:
-                image_stem = image_name.split('.')[0]
-                print(image_name,'image_name')
-                image = cv2.imread(os.path.join(dir_in,image_name) )
+            ls_imgs = [(image_path, output)]
+
+        for input_path, output_path in ls_imgs:
+            print(input_path, 'image_name')
+            if os.path.exists(output_path):
+                if overwrite:
+                    self.logger.warning("will overwrite existing output file '%s'", output_path)
+                else:
+                    self.logger.warning("will skip input for existing output file '%s'", output_path)
+            image = cv2.imread(input_path)
+            result = self.run_single(image, use_patches)
+            cv2.imwrite(output_path, result)
+
+    def run_single(self, image: np.ndarray, use_patches=False):
        img_last = 0
        for n, (model, model_file) in enumerate(zip(self.models, self.model_files)):
-                    self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files)))
+            self.logger.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files)))

            res = self.predict(model, image, use_patches)

@ -374,5 +357,4 @@ class SbbBinarizer:
        kernel = np.ones((5, 5), np.uint8)
        img_last[:, :][img_last[:, :] > 0] = 255
        img_last = (img_last[:, :] == 0) * 255
-                
-                cv2.imwrite(os.path.join(output, image_stem + '.png'), img_last)
+        return img_last
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@ -14,21 +14,16 @@ from shapely.ops import unary_union, nearest_points
 from .rotate import rotate_image, rotation_image_new

 def contours_in_same_horizon(cy_main_hor):
-    X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
-    X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
-
-    X1[0::1, :] = cy_main_hor[:]
-    X2 = X1.T
-
-    X_dif = np.abs(X2 - X1)
-    args_help = np.array(range(len(cy_main_hor)))
-    all_args = []
-    for i in range(len(cy_main_hor)):
-        list_h = list(args_help[X_dif[i, :] <= 20])
-        list_h.append(i)
-        if len(list_h) > 1:
-            all_args.append(list(set(list_h)))
-    return np.unique(np.array(all_args, dtype=object))
+    """
+    Takes an array of y coords, identifies all pairs among them
+    which are close to each other, and returns all such pairs
+    by index into the array.
+    """
+    sort = np.argsort(cy_main_hor)
+    same = np.diff(cy_main_hor[sort] <= 20)
+    # groups = np.split(sort, np.arange(len(cy_main_hor) - 1)[~same] + 1)
+    same = np.flatnonzero(same)
+    return np.stack((sort[:-1][same], sort[1:][same])).T

 def find_contours_mean_y_diff(contours_main):
    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@ -89,7 +89,7 @@ class EynollahXmlWriter:

    def build_pagexml_no_full_layout(
            self, found_polygons_text_region,
-            page_coord, order_of_texts, id_of_texts,
+            page_coord, order_of_texts,
            all_found_textline_polygons,
            all_box_coord,
            found_polygons_text_region_img,
@ -102,7 +102,7 @@ class EynollahXmlWriter:
            **kwargs):
        return self.build_pagexml_full_layout(
            found_polygons_text_region, [],
-            page_coord, order_of_texts, id_of_texts,
+            page_coord, order_of_texts,
            all_found_textline_polygons, [],
            all_box_coord, [],
            found_polygons_text_region_img, found_polygons_tables, [],
@ -116,7 +116,7 @@ class EynollahXmlWriter:
    def build_pagexml_full_layout(
            self,
            found_polygons_text_region, found_polygons_text_region_h,
-            page_coord, order_of_texts, id_of_texts,
+            page_coord, order_of_texts,
            all_found_textline_polygons, all_found_textline_polygons_h,
            all_box_coord, all_box_coord_h,
            found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,