From 09ece86f0dcb860eef978319b2350ccf7df13c2c Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 19 Aug 2025 11:58:45 +0200
Subject: [PATCH 01/41] dilate_textregions_contours: simplify (via shapely's
 Polygon.buffer()), ensure validity

---
 src/eynollah/eynollah.py      | 212 ++--------------------------------
 src/eynollah/utils/contour.py |  30 ++++-
 2 files changed, 36 insertions(+), 206 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index d47016b..55789ae 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -27,6 +27,7 @@ from loky import ProcessPoolExecutor
 import xml.etree.ElementTree as ET
 import cv2
 import numpy as np
+from shapely.geometry import Polygon
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
 from numba import cuda
@@ -68,6 +69,7 @@ from .utils.contour import (
     get_text_region_boxes_by_given_contours,
     get_textregion_contours_in_org_image,
     get_textregion_contours_in_org_image_light,
+    make_valid,
     return_contours_of_image,
     return_contours_of_interested_region,
     return_contours_of_interested_region_by_min_size,
@@ -3670,211 +3672,15 @@ class Eynollah:
         return x_differential_new
 
     def dilate_textregions_contours_textline_version(self, all_found_textline_polygons):
-        #print(all_found_textline_polygons)
-        for j in range(len(all_found_textline_polygons)):
-            for ij in range(len(all_found_textline_polygons[j])):
-                con_ind = all_found_textline_polygons[j][ij]
-                area = cv2.contourArea(con_ind)
-                con_ind = con_ind.astype(float)
-
-                x_differential = np.diff( con_ind[:,0,0])
-                y_differential = np.diff( con_ind[:,0,1])
-
-                x_differential = gaussian_filter1d(x_differential, 0.1)
-                y_differential = gaussian_filter1d(y_differential, 0.1)
-
-                x_min = float(np.min( con_ind[:,0,0] ))
-                y_min = float(np.min( con_ind[:,0,1] ))
-
-                x_max = float(np.max( con_ind[:,0,0] ))
-                y_max = float(np.max( con_ind[:,0,1] ))
-
-                x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential]
-                y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential]
-
-                abs_diff=abs(abs(x_differential)- abs(y_differential) )
-
-                inc_x = np.zeros(len(x_differential)+1)
-                inc_y = np.zeros(len(x_differential)+1)
-
-                if (y_max-y_min) <= (x_max-x_min):
-                    dilation_m1 = round(area / (x_max-x_min) * 0.12)
-                else:
-                    dilation_m1 = round(area / (y_max-y_min) * 0.12)
-
-                if dilation_m1>8:
-                    dilation_m1 = 8
-                if dilation_m1<6:
-                    dilation_m1 = 6
-                #print(dilation_m1, 'dilation_m1')
-                dilation_m1 = 6
-                dilation_m2 = int(dilation_m1/2.) +1 
-
-                for i in range(len(x_differential)):
-                    if abs_diff[i]==0:
-                        inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
-                        inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
-                    elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0:
-                        inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
-                    elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0:
-                        inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
-
-                    elif abs_diff[i]!=0 and abs_diff[i]>=3:
-                        if abs(x_differential[i])>abs(y_differential[i]):
-                            inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
-                        else:
-                            inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
-                    else:
-                        inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
-                        inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
-
-                inc_x[0] = inc_x[-1]
-                inc_y[0] = inc_y[-1]
-
-                con_scaled = con_ind*1
-
-                con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:]
-                con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:]
-
-                con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
-                con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
-
-                area_scaled = cv2.contourArea(con_scaled.astype(np.int32))
-
-                con_ind = con_ind.astype(np.int32)
-
-                results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False)
-                           for ind in range(len(con_scaled[:,0, 1])) ]
-                results = np.array(results)
-                #print(results,'results')
-                results[results==0] = 1
-
-                diff_result = np.diff(results)
-
-                indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2]
-                indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2]
-
-                if results[0]==1:
-                    con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1]
-                    con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0]
-                    #indices_2 = indices_2[1:]
-                    indices_m2 = indices_m2[1:]
-
-                if len(indices_2)>len(indices_m2):
-                    con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1]
-                    con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0]
-                    indices_2 = indices_2[:-1]
-
-                for ii in range(len(indices_2)):
-                    con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1]
-                    con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0]
-
-                all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1]
-                all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0]
-        return all_found_textline_polygons
+        return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords,
+                          dtype=int)[:, np.newaxis]
+                 for poly in region]
+                for region in all_found_textline_polygons]
 
     def dilate_textregions_contours(self, all_found_textline_polygons):
-        #print(all_found_textline_polygons)
-        for j in range(len(all_found_textline_polygons)):
-            con_ind = all_found_textline_polygons[j]
-            #print(len(con_ind[:,0,0]),'con_ind[:,0,0]')
-            area = cv2.contourArea(con_ind)
-            con_ind = con_ind.astype(float)
-
-            x_differential = np.diff( con_ind[:,0,0])
-            y_differential = np.diff( con_ind[:,0,1])
-
-            x_differential = gaussian_filter1d(x_differential, 0.1)
-            y_differential = gaussian_filter1d(y_differential, 0.1)
-
-            x_min = float(np.min( con_ind[:,0,0] ))
-            y_min = float(np.min( con_ind[:,0,1] ))
-
-            x_max = float(np.max( con_ind[:,0,0] ))
-            y_max = float(np.max( con_ind[:,0,1] ))
-
-            x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential]
-            y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential]
-
-            abs_diff=abs(abs(x_differential)- abs(y_differential) )
-
-            inc_x = np.zeros(len(x_differential)+1)
-            inc_y = np.zeros(len(x_differential)+1)
-
-            if (y_max-y_min) <= (x_max-x_min):
-                dilation_m1 = round(area / (x_max-x_min) * 0.12)
-            else:
-                dilation_m1 = round(area / (y_max-y_min) * 0.12)
-
-            if dilation_m1>8:
-                dilation_m1 = 8
-            if dilation_m1<6:
-                dilation_m1 = 6
-            #print(dilation_m1, 'dilation_m1')
-            dilation_m1 = 6
-            dilation_m2 = int(dilation_m1/2.) +1 
-
-            for i in range(len(x_differential)):
-                if abs_diff[i]==0:
-                    inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
-                    inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
-                elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0:
-                    inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
-                elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0:
-                    inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
-
-                elif abs_diff[i]!=0 and abs_diff[i]>=3:
-                    if abs(x_differential[i])>abs(y_differential[i]):
-                        inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
-                    else:
-                        inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
-                else:
-                    inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
-                    inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
-
-            inc_x[0] = inc_x[-1]
-            inc_y[0] = inc_y[-1]
-
-            con_scaled = con_ind*1
-
-            con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:]
-            con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:]
-
-            con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
-            con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
-
-            area_scaled = cv2.contourArea(con_scaled.astype(np.int32))
-
-            con_ind = con_ind.astype(np.int32)
-
-            results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False)
-                       for ind in range(len(con_scaled[:,0, 1])) ]
-            results = np.array(results)
-            #print(results,'results')
-            results[results==0] = 1
-
-            diff_result = np.diff(results)
-            indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2]
-            indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2]
-
-            if results[0]==1:
-                con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1]
-                con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0]
-                #indices_2 = indices_2[1:]
-                indices_m2 = indices_m2[1:]
-
-            if len(indices_2)>len(indices_m2):
-                con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1]
-                con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0]
-                indices_2 = indices_2[:-1]
-
-            for ii in range(len(indices_2)):
-                con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1]
-                con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0]
-
-            all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1]
-            all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0]
-        return all_found_textline_polygons
+        return [np.array(make_valid(Polygon(poly[:, 0])).buffer(5).exterior.coords,
+                         dtype=int)[:, np.newaxis]
+                for poly in all_found_textline_polygons]
 
     def dilate_textline_contours(self, all_found_textline_polygons):
         for j in range(len(all_found_textline_polygons)):
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 0e84153..3d7e5c8 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -1,7 +1,7 @@
 from functools import partial
 import cv2
 import numpy as np
-from shapely import geometry
+from shapely.geometry import Polygon
 
 from .rotate import rotate_image, rotation_image_new
 
@@ -43,7 +43,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area
         if len(c) < 3:  # A polygon cannot have less than 3 points
             continue
 
-        polygon = geometry.Polygon([point[0] for point in c])
+        polygon = Polygon([point[0] for point in c])
         area = polygon.area
         if (area >= min_area * np.prod(image.shape[:2]) and
             area <= max_area * np.prod(image.shape[:2]) and
@@ -58,7 +58,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
         if len(c) < 3:  # A polygon cannot have less than 3 points
             continue
 
-        polygon = geometry.Polygon([point[0] for point in c])
+        polygon = Polygon([point[0] for point in c])
         # area = cv2.contourArea(c)
         area = polygon.area
         ##print(np.prod(thresh.shape[:2]))
@@ -332,3 +332,27 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
 
     return img_ret[:, :, 0]
 
+def make_valid(polygon: Polygon) -> Polygon:
+    """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
+    points = list(polygon.exterior.coords)
+    # try by re-arranging points
+    for split in range(1, len(points)):
+        if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
+            break
+        # simplification may not be possible (at all) due to ordering
+        # in that case, try another starting point
+        polygon = Polygon(points[-split:]+points[:-split])
+    # try by simplification
+    for tolerance in range(int(polygon.area + 1.5)):
+        if polygon.is_valid:
+            break
+        # simplification may require a larger tolerance
+        polygon = polygon.simplify(tolerance + 1)
+    # try by enlarging
+    for tolerance in range(1, int(polygon.area + 2.5)):
+        if polygon.is_valid:
+            break
+        # enlargement may require a larger tolerance
+        polygon = polygon.buffer(tolerance)
+    assert polygon.is_valid, polygon.wkt
+    return polygon

From b48c41e68ff59d8cff97a59a534fee20d2d32408 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 19 Aug 2025 20:09:09 +0200
Subject: [PATCH 02/41] return_boxes_of_images_by_order_of_reading_new:
 simplify, avoid changing dtype during np.append

---
 src/eynollah/eynollah.py       |   2 +-
 src/eynollah/utils/__init__.py | 214 +++++++++++++++------------------
 2 files changed, 97 insertions(+), 119 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 55789ae..959e9a6 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3678,7 +3678,7 @@ class Eynollah:
                 for region in all_found_textline_polygons]
 
     def dilate_textregions_contours(self, all_found_textline_polygons):
-        return [np.array(make_valid(Polygon(poly[:, 0])).buffer(5).exterior.coords,
+        return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords,
                          dtype=int)[:, np.newaxis]
                 for poly in all_found_textline_polygons]
 
diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index c5962f8..7168d95 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -1632,6 +1632,7 @@ def return_boxes_of_images_by_order_of_reading_new(
         regions_without_separators = cv2.flip(regions_without_separators,1)
     boxes=[]
     peaks_neg_tot_tables = []
+    splitter_y_new = np.array(splitter_y_new, dtype=int)
     for i in range(len(splitter_y_new)-1):
         #print(splitter_y_new[i],splitter_y_new[i+1])
         matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) &
@@ -1644,14 +1645,9 @@ def return_boxes_of_images_by_order_of_reading_new(
         #    0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
         if True:
             try:
-                if erosion_hurts:
-                    num_col, peaks_neg_fin = find_num_col(
-                        regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
-                        num_col_classifier, tables, multiplier=6.)
-                else:
-                    num_col, peaks_neg_fin = find_num_col(
-                        regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
-                        num_col_classifier, tables, multiplier=7.)
+                num_col, peaks_neg_fin = find_num_col(
+                    regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
+                    num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.)
             except:
                 peaks_neg_fin=[]
                 num_col = 0
@@ -1661,7 +1657,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                     #print('burda')
                     if len(peaks_neg_fin)==0:
                         num_col, peaks_neg_fin = find_num_col(
-                            regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
+                            regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
                             num_col_classifier, tables, multiplier=3.)
                     peaks_neg_fin_early=[]
                     peaks_neg_fin_early.append(0)
@@ -1674,21 +1670,21 @@ def return_boxes_of_images_by_order_of_reading_new(
                     peaks_neg_fin_rev=[]
                     for i_n in range(len(peaks_neg_fin_early)-1):
                         #print(i_n,'i_n')
-                        #plt.plot(regions_without_separators[int(splitter_y_new[i]):
-                        #                                    int(splitter_y_new[i+1]),
+                        #plt.plot(regions_without_separators[splitter_y_new[i]:
+                        #                                    splitter_y_new[i+1],
                         #                                    peaks_neg_fin_early[i_n]:
                         #                                    peaks_neg_fin_early[i_n+1]].sum(axis=0) )
                         #plt.show()
                         try:
                             num_col, peaks_neg_fin1 = find_num_col(
-                                regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
+                                regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
                                                            peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
                                 num_col_classifier,tables, multiplier=7.)
                         except:
                             peaks_neg_fin1=[]
                         try:
                             num_col, peaks_neg_fin2 = find_num_col(
-                                regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
+                                regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
                                                            peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
                                 num_col_classifier,tables, multiplier=5.)
                         except:
@@ -1716,7 +1712,7 @@ def return_boxes_of_images_by_order_of_reading_new(
             except:
                 pass
             #num_col, peaks_neg_fin = find_num_col(
-            #    regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
+            #    regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],:],
             #    multiplier=7.0)
             x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
             x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
@@ -1738,31 +1734,28 @@ def return_boxes_of_images_by_order_of_reading_new(
                 y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
                 new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
                     x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff)
-            x_starting = np.array(x_starting)
-            x_ending = np.array(x_ending)
-            y_type_2 = np.array(y_type_2)
-            y_diff_type_2 = np.array(y_diff_type_2)
 
+            all_columns = set(range(len(peaks_neg_tot) - 1))
             if ((reading_order_type==1) or
                 (reading_order_type==0 and
                  (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))):
                 try:
-                    y_grenze=int(splitter_y_new[i])+300
+                    y_grenze = splitter_y_new[i] + 300
                     #check if there is a big separator in this y_mains_sep_ohne_grenzen
 
                     args_early_ys=np.arange(len(y_type_2))
                     #print(args_early_ys,'args_early_ys')
-                    #print(int(splitter_y_new[i]),int(splitter_y_new[i+1]))
+                    #print(splitter_y_new[i], splitter_y_new[i+1])
 
-                    x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) &
+                    x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) &
                                                (y_type_2 <= y_grenze)]
-                    x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) &
+                    x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) &
                                            (y_type_2 <= y_grenze)]
-                    y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                    y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) &
                                            (y_type_2 <= y_grenze)]
-                    y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                    y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
                                                      (y_type_2 <= y_grenze)]
-                    args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) &
+                    args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) &
                                             (y_type_2 <= y_grenze)]
                     if len(y_type_2_up) > 0:
                         y_main_separator_up = y_type_2_up [(x_starting_up==0) &
@@ -1776,8 +1769,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                             args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
                             #print(args_to_be_kept,'args_to_be_kept')
                             boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
-                                          int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))])
-                            splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0]
+                                          splitter_y_new[i], y_diff_main_separator_up.max()])
+                            splitter_y_new[i] = y_diff_main_separator_up.max()
 
                             #print(splitter_y_new[i],'splitter_y_new[i]')
                             y_type_2 = y_type_2[args_to_be_kept]
@@ -1786,29 +1779,28 @@ def return_boxes_of_images_by_order_of_reading_new(
                             y_diff_type_2 = y_diff_type_2[args_to_be_kept]
 
                             #print('galdiha')
-                            y_grenze=int(splitter_y_new[i])+200
+                            y_grenze = splitter_y_new[i] + 200
                             args_early_ys2=np.arange(len(y_type_2))
-                            y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                            y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) &
                                                  (y_type_2 <= y_grenze)]
-                            x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) &
+                            x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) &
                                                      (y_type_2 <= y_grenze)]
-                            x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) &
+                            x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) &
                                                  (y_type_2 <= y_grenze)]
-                            y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                            y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
                                                            (y_type_2 <= y_grenze)]
-                            args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) &
+                            args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) &
                                                     (y_type_2 <= y_grenze)]
                             #print(y_type_2_up,x_starting_up,x_ending_up,'didid')
-                            nodes_in = []
+                            nodes_in = set()
                             for ij in range(len(x_starting_up)):
-                                nodes_in = nodes_in + list(range(x_starting_up[ij],
-                                                                 x_ending_up[ij]))
-                            nodes_in = np.unique(nodes_in)
+                                nodes_in.update(range(x_starting_up[ij],
+                                                      x_ending_up[ij]))
                             #print(nodes_in,'nodes_in')
 
-                            if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
+                            if nodes_in == set(range(len(peaks_neg_tot)-1)):
                                 pass
-                            elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)):
+                            elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
                                 pass
                             else:
                                 #print('burdaydikh')
@@ -1823,17 +1815,16 @@ def return_boxes_of_images_by_order_of_reading_new(
                                     pass
                                 #print('burdaydikh2')
                         elif len(y_diff_main_separator_up)==0:
-                            nodes_in = []
+                            nodes_in = set()
                             for ij in range(len(x_starting_up)):
-                                nodes_in = nodes_in + list(range(x_starting_up[ij],
-                                                                 x_ending_up[ij]))
-                            nodes_in = np.unique(nodes_in)
+                                nodes_in.update(range(x_starting_up[ij],
+                                                      x_ending_up[ij]))
                             #print(nodes_in,'nodes_in2')
                             #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
 
-                            if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
+                            if nodes_in == set(range(len(peaks_neg_tot)-1)):
                                 pass
-                            elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)):
+                            elif nodes_in == set(range(1,len(peaks_neg_tot)-1)):
                                 pass
                             else:
                                 #print('burdaydikh')
@@ -1858,26 +1849,24 @@ def return_boxes_of_images_by_order_of_reading_new(
                     x_end_by_order=[]
                     if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1:
                         if reading_order_type==1:
-                            y_lines_by_order.append(int(splitter_y_new[i]))
+                            y_lines_by_order.append(splitter_y_new[i])
                             x_start_by_order.append(0)
                             x_end_by_order.append(len(peaks_neg_tot)-2)
                         else:
                             #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                            columns_covered_by_mothers = []
+                            columns_covered_by_mothers = set()
                             for dj in range(len(x_start_without_mother)):
-                                columns_covered_by_mothers = columns_covered_by_mothers + \
-                                    list(range(x_start_without_mother[dj],
-                                               x_end_without_mother[dj]))
-                            columns_covered_by_mothers = list(set(columns_covered_by_mothers))
-
-                            all_columns=np.arange(len(peaks_neg_tot)-1)
-                            columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
-                            y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
-                            ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
+                                columns_covered_by_mothers.update(
+                                    range(x_start_without_mother[dj],
+                                          x_end_without_mother[dj]))
+                            columns_not_covered = list(all_columns - columns_covered_by_mothers)
+                            y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
+                                                                   dtype=int) * splitter_y_new[i])
+                            ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                             ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                            x_starting = np.append(x_starting, columns_not_covered)
+                            x_starting = np.append(x_starting, np.array(columns_not_covered, int))
                             x_starting = np.append(x_starting, x_start_without_mother)
-                            x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                            x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                             x_ending = np.append(x_ending, x_end_without_mother)
 
                         ind_args=np.arange(len(y_type_2))
@@ -1906,39 +1895,34 @@ def return_boxes_of_images_by_order_of_reading_new(
                                 x_end_by_order.append(x_end_column_sort[ii]-1)
                     else:
                         #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                        columns_covered_by_mothers = []
+                        columns_covered_by_mothers = set()
                         for dj in range(len(x_start_without_mother)):
-                            columns_covered_by_mothers = columns_covered_by_mothers + \
-                                list(range(x_start_without_mother[dj],
-                                           x_end_without_mother[dj]))
-                        columns_covered_by_mothers = list(set(columns_covered_by_mothers))
-
-                        all_columns=np.arange(len(peaks_neg_tot)-1)
-                        columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
-                        y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
-                        ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
+                            columns_covered_by_mothers.update(
+                                range(x_start_without_mother[dj],
+                                      x_end_without_mother[dj]))
+                        columns_not_covered = list(all_columns - columns_covered_by_mothers)
+                        y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
+                                                               dtype=int) * splitter_y_new[i])
+                        ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                         ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                        x_starting = np.append(x_starting, columns_not_covered)
+                        x_starting = np.append(x_starting, np.array(columns_not_covered, int))
                         x_starting = np.append(x_starting, x_start_without_mother)
-                        x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                        x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                         x_ending = np.append(x_ending, x_end_without_mother)
 
-                        columns_covered_by_with_child_no_mothers = []
+                        columns_covered_by_with_child_no_mothers = set()
                         for dj in range(len(x_end_with_child_without_mother)):
-                            columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
-                                list(range(x_start_with_child_without_mother[dj],
-                                           x_end_with_child_without_mother[dj]))
-                        columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers))
-
-                        all_columns = np.arange(len(peaks_neg_tot)-1)
-                        columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers))
+                            columns_covered_by_with_child_no_mothers.update(
+                                range(x_start_with_child_without_mother[dj],
+                                      x_end_with_child_without_mother[dj]))
+                        columns_not_covered_child_no_mother = list(all_columns - columns_covered_by_with_child_no_mothers)
                         #indexes_to_be_spanned=[]
                         for i_s in range(len(x_end_with_child_without_mother)):
                             columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
                         columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
                         ind_args = np.arange(len(y_type_2))
-                        x_end_with_child_without_mother = np.array(x_end_with_child_without_mother)
-                        x_start_with_child_without_mother = np.array(x_start_with_child_without_mother)
+                        x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int)
+                        x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int)
                         for i_s_nc in columns_not_covered_child_no_mother:
                             if i_s_nc in x_start_with_child_without_mother:
                                 x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
@@ -1951,7 +1935,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                 for i_c in range(len(y_column_nc)):
                                     if i_c==(len(y_column_nc)-1):
                                         ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) &
-                                                                              (y_type_2<int(splitter_y_new[i+1])) &
+                                                                              (y_type_2<splitter_y_new[i+1]) &
                                                                               (x_starting>=i_s_nc) &
                                                                               (x_ending<=x_end_biggest_column)]
                                     else:
@@ -1967,21 +1951,19 @@ def return_boxes_of_images_by_order_of_reading_new(
                                     if len(x_diff_all_between_nm_wc)>0:
                                         biggest=np.argmax(x_diff_all_between_nm_wc)
 
-                                    columns_covered_by_mothers = []
+                                    columns_covered_by_mothers = set()
                                     for dj in range(len(x_starting_all_between_nm_wc)):
-                                        columns_covered_by_mothers = columns_covered_by_mothers + \
-                                            list(range(x_starting_all_between_nm_wc[dj],
-                                                       x_ending_all_between_nm_wc[dj]))
-                                    columns_covered_by_mothers = list(set(columns_covered_by_mothers))
-
-                                    all_columns=np.arange(i_s_nc, x_end_biggest_column)
-                                    columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
+                                        columns_covered_by_mothers.update(
+                                            range(x_starting_all_between_nm_wc[dj],
+                                                  x_ending_all_between_nm_wc[dj]))
+                                    child_columns = set(range(i_s_nc, x_end_biggest_column))
+                                    columns_not_covered = list(child_columns - columns_covered_by_mothers)
 
                                     should_longest_line_be_extended=0
                                     if (len(x_diff_all_between_nm_wc) > 0 and
                                         set(list(range(x_starting_all_between_nm_wc[biggest],
                                                         x_ending_all_between_nm_wc[biggest])) +
-                                            list(columns_not_covered)) != set(all_columns)):
+                                            list(columns_not_covered)) != child_columns):
                                         should_longest_line_be_extended=1
                                         index_lines_so_close_to_top_separator = \
                                             np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
@@ -2008,8 +1990,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                                             pass
 
                                     y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
-                                    x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered)
-                                    x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1)
+                                    x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
+                                    x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
 
                                     ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
                                     for column in range(i_s_nc, x_end_biggest_column):
@@ -2078,7 +2060,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                             if len(y_in_cols)>0:
                                 y_down=np.min(y_in_cols)
                             else:
-                                y_down=[int(splitter_y_new[i+1])][0]
+                                y_down=splitter_y_new[i+1]
                             #print(y_itself,'y_itself')
                             boxes.append([peaks_neg_tot[column],
                                           peaks_neg_tot[column+1],
@@ -2086,45 +2068,42 @@ def return_boxes_of_images_by_order_of_reading_new(
                                           y_down])
                 except:
                     boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
-                                  int(splitter_y_new[i]), int(splitter_y_new[i+1])])
+                                  splitter_y_new[i], splitter_y_new[i+1]])
             else:
                 y_lines_by_order=[]
                 x_start_by_order=[]
                 x_end_by_order=[]
                 if len(x_starting)>0:
-                    all_columns = np.arange(len(peaks_neg_tot)-1)
-                    columns_covered_by_lines_covered_more_than_2col = []
+                    columns_covered_by_lines_covered_more_than_2col = set()
                     for dj in range(len(x_starting)):
-                        if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns):
-                            pass
-                        else:
-                            columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \
-                                list(range(x_starting[dj],x_ending[dj]))
-                    columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
-                    columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
+                        if set(range(x_starting[dj], x_ending[dj])) != all_columns:
+                            columns_covered_by_lines_covered_more_than_2col.update(
+                                range(x_starting[dj], x_ending[dj]))
+                    columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col)
 
-                    y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
-                    ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
+                    y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1,
+                                                           dtype=int) * splitter_y_new[i])
+                    ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                     ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, columns_not_covered)
-                    x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
+                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
                     if len(new_main_sep_y) > 0:
                         x_starting = np.append(x_starting, 0)
-                        x_ending = np.append(x_ending, len(peaks_neg_tot)-1)
+                        x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
                     else:
                         x_starting = np.append(x_starting, x_starting[0])
                         x_ending = np.append(x_ending, x_ending[0])
                 else:
-                    all_columns = np.arange(len(peaks_neg_tot)-1)
-                    columns_not_covered = list(set(all_columns))
-                    y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
-                    ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
+                    columns_not_covered = list(all_columns)
+                    y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered),
+                                                           dtype=int) * splitter_y_new[i])
+                    ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                     ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, columns_not_covered)
-                    x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
+                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
 
-                ind_args=np.array(range(len(y_type_2)))
-                #ind_args=np.array(ind_args)
+                ind_args = np.arange(len(y_type_2))
+                
                 for column in range(len(peaks_neg_tot)-1):
                     #print(column,'column')
                     ind_args_in_col=ind_args[x_starting==column]
@@ -2155,7 +2134,6 @@ def return_boxes_of_images_by_order_of_reading_new(
                     x_start_itself=x_start_copy.pop(il)
                     x_end_itself=x_end_copy.pop(il)
 
-                    #print(y_copy,'y_copy2')
                     for column in range(x_start_itself, x_end_itself+1):
                         #print(column,'cols')
                         y_in_cols=[]
@@ -2170,7 +2148,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                         if len(y_in_cols)>0:
                             y_down=np.min(y_in_cols)
                         else:
-                            y_down=[int(splitter_y_new[i+1])][0]
+                            y_down=splitter_y_new[i+1]
                         #print(y_itself,'y_itself')
                         boxes.append([peaks_neg_tot[column],
                                       peaks_neg_tot[column+1],

From 66b2bce8b9f420895b8c47ebf46faf1ca3bbdd03 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Fri, 19 Sep 2025 12:19:58 +0200
Subject: [PATCH 03/41] return_boxes_of_images_by_order_of_reading_new: log any
 exceptions

---
 src/eynollah/eynollah.py       |  6 ++++--
 src/eynollah/utils/__init__.py | 22 ++++++++++++++++------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 959e9a6..8080035 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -4553,11 +4553,13 @@ class Eynollah:
                 if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                     boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
                         splitter_y_new, regions_without_separators, matrix_of_lines_ch,
-                        num_col_classifier, erosion_hurts, self.tables, self.right2left)
+                        num_col_classifier, erosion_hurts, self.tables, self.right2left,
+                        logger=self.logger)
                 else:
                     boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
                         splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
-                        num_col_classifier, erosion_hurts, self.tables, self.right2left)
+                        num_col_classifier, erosion_hurts, self.tables, self.right2left,
+                        logger=self.logger)
 
         if self.plotter:
             self.plotter.write_images_into_directory(polygons_of_images, image_page)
diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index 7168d95..3c130d7 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+from logging import getLogger
 import time
 import math
 
@@ -1626,10 +1628,16 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
 def return_boxes_of_images_by_order_of_reading_new(
         splitter_y_new, regions_without_separators,
         matrix_of_lines_ch,
-        num_col_classifier, erosion_hurts, tables, right2left_readingorder):
+        num_col_classifier, erosion_hurts, tables,
+        right2left_readingorder,
+        logger=None):
 
     if right2left_readingorder:
         regions_without_separators = cv2.flip(regions_without_separators,1)
+    if logger is None:
+        logger = getLogger(__package__)
+    logger.debug('enter return_boxes_of_images_by_order_of_reading_new')
+
     boxes=[]
     peaks_neg_tot_tables = []
     splitter_y_new = np.array(splitter_y_new, dtype=int)
@@ -1710,7 +1718,7 @@ def return_boxes_of_images_by_order_of_reading_new(
 
                     #print(peaks_neg_fin,'peaks_neg_fin')
             except:
-                pass
+                logger.exception("cannot find peaks consistent with columns")
             #num_col, peaks_neg_fin = find_num_col(
             #    regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],:],
             #    multiplier=7.0)
@@ -1987,7 +1995,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                             x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
                                             x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
                                         except:
-                                            pass
+                                            logger.exception("cannot append")
 
                                     y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
                                     x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
@@ -2067,6 +2075,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                           y_itself,
                                           y_down])
                 except:
+                    logger.exception("cannot assign boxes")
                     boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
                                   splitter_y_new[i], splitter_y_new[i+1]])
             else:
@@ -2170,6 +2179,7 @@ def return_boxes_of_images_by_order_of_reading_new(
             x_end_new = regions_without_separators.shape[1] - boxes[i][0]
             boxes[i][0] = x_start_new
             boxes[i][1] = x_end_new
-        return boxes, peaks_neg_tot_tables_new
-    else:
-        return boxes, peaks_neg_tot_tables
+        peaks_neg_tot_tables = peaks_neg_tot_tables_new
+
+    logger.debug('exit return_boxes_of_images_by_order_of_reading_new')
+    return boxes, peaks_neg_tot_tables

From afba70c920b4f1dc80bd70511a07df82439e6db3 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 19 Aug 2025 22:56:36 +0200
Subject: [PATCH 04/41] separate_lines/do_work_of_slopes: skip if crop is empty

---
 src/eynollah/utils/separate_lines.py | 46 +++++++++++++++-------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 0322579..ffbfff7 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1345,24 +1345,26 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
 
     return contours_rotated_clean
 
-def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
+def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
     if logger is None:
         logger = getLogger(__package__)
+    if not np.prod(img_crop.shape):
+        return img_crop
 
     if num_col == 1:
-        num_patches = int(img_path.shape[1] / 200.0)
+        num_patches = int(img_crop.shape[1] / 200.0)
     else:
-        num_patches = int(img_path.shape[1] / 140.0)
-    # num_patches=int(img_path.shape[1]/200.)
+        num_patches = int(img_crop.shape[1] / 140.0)
+    # num_patches=int(img_crop.shape[1]/200.)
     if num_patches == 0:
         num_patches = 1
 
-    img_patch_ineterst = img_path[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
+    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
 
-    # plt.imshow(img_patch_ineterst)
+    # plt.imshow(img_patch_interest)
     # plt.show()
 
-    length_x = int(img_path.shape[1] / float(num_patches))
+    length_x = int(img_crop.shape[1] / float(num_patches))
     # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
     margin = int(0.04 * length_x)
     # if margin<=4:
@@ -1370,7 +1372,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
     # margin=0
 
     width_mid = length_x - 2 * margin
-    nxf = img_path.shape[1] / float(width_mid)
+    nxf = img_crop.shape[1] / float(width_mid)
 
     if nxf > int(nxf):
         nxf = int(nxf) + 1
@@ -1386,12 +1388,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
             index_x_d = i * width_mid
             index_x_u = index_x_d + length_x
 
-        if index_x_u > img_path.shape[1]:
-            index_x_u = img_path.shape[1]
-            index_x_d = img_path.shape[1] - length_x
+        if index_x_u > img_crop.shape[1]:
+            index_x_u = img_crop.shape[1]
+            index_x_d = img_crop.shape[1] - length_x
 
         # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-        img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
+        img_xline = img_patch_interest[:, index_x_d:index_x_u]
 
         try:
             assert img_xline.any()
@@ -1407,9 +1409,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
         img_line_rotated = rotate_image(img_xline, slope_xline)
         img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
         
-    img_patch_ineterst = img_path[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
+    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
 
-    img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
+    img_patch_interest_revised = np.zeros(img_patch_interest.shape)
 
     for i in range(nxf):
         if i == 0:
@@ -1419,11 +1421,11 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
             index_x_d = i * width_mid
             index_x_u = index_x_d + length_x
 
-        if index_x_u > img_path.shape[1]:
-            index_x_u = img_path.shape[1]
-            index_x_d = img_path.shape[1] - length_x
+        if index_x_u > img_crop.shape[1]:
+            index_x_u = img_crop.shape[1]
+            index_x_d = img_crop.shape[1] - length_x
 
-        img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
+        img_xline = img_patch_interest[:, index_x_d:index_x_u]
 
         img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
         img_int[:, :] = img_xline[:, :]  # img_patch_org[:,:,0]
@@ -1446,9 +1448,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
             int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
 
         img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
-        img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
+        img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
 
-    return img_patch_ineterst_revised
+    return img_patch_interest_revised
 
 def do_image_rotation(angle, img, sigma_des, logger=None):
     if logger is None:
@@ -1546,7 +1548,7 @@ def do_work_of_slopes_new(
     img_int_p = all_text_region_raw[:,:]
     img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
 
-    if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
+    if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
         slope = 0
         slope_for_all = slope_deskew
         all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
@@ -1603,7 +1605,7 @@ def do_work_of_slopes_new_curved(
     # plt.imshow(img_int_p)
     # plt.show()
 
-    if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
+    if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
         slope = 0
         slope_for_all = slope_deskew
     else:

From 41cc38c51aaa74fb27854a101e9fbe727478f86b Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 20 Aug 2025 14:28:14 +0200
Subject: [PATCH 05/41] get_textregion_contours_in_org_image_light: no back
 rotation, drop slope_first (always 0)

---
 src/eynollah/eynollah.py      | 14 ++++++--------
 src/eynollah/utils/contour.py | 26 +++++++++++---------------
 2 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 8080035..49f6b33 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -2927,12 +2927,10 @@ class Eynollah:
         #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
         slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
                                           map=self.executor.map, logger=self.logger, plotter=self.plotter)
-        slope_first = 0
-
         if self.plotter:
             self.plotter.save_deskewed_image(slope_deskew)
         self.logger.info("slope_deskew: %.2f°", slope_deskew)
-        return slope_deskew, slope_first
+        return slope_deskew
 
     def run_marginals(
             self, image_page, textline_mask_tot_ea, mask_images, mask_lines,
@@ -4173,9 +4171,9 @@ class Eynollah:
 
                 textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
 
-                slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew)
+                slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew)
             else:
-                slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
+                slope_deskew = self.run_deskew(textline_mask_tot_ea)
             #print("text region early -2,5 in %.1fs", time.time() - t0)
             #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
             num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
@@ -4216,7 +4214,7 @@ class Eynollah:
             textline_mask_tot_ea = self.run_textline(image_page)
             self.logger.info("textline detection took %.1fs", time.time() - t1)
             t1 = time.time()
-            slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
+            slope_deskew = self.run_deskew(textline_mask_tot_ea)
             self.logger.info("deskewing took %.1fs", time.time() - t1)
         elif num_col_classifier in (1,2):
             org_h_l_m = textline_mask_tot_ea.shape[0]
@@ -4405,12 +4403,12 @@ class Eynollah:
                 contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals)
             #print("text region early 3.5 in %.1fs", time.time() - t0)
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
-                contours_only_text_parent, self.image, slope_first, confidence_matrix,  map=self.executor.map)
+                contours_only_text_parent, self.image, confidence_matrix)
             #txt_con_org = self.dilate_textregions_contours(txt_con_org)
             #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent)
         else:
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
-                contours_only_text_parent, self.image, slope_first, confidence_matrix,  map=self.executor.map)
+                contours_only_text_parent, self.image, confidence_matrix)
         #print("text region early 4 in %.1fs", time.time() - t0)
         boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
         boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 3d7e5c8..249748a 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -247,23 +247,19 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
         cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
     return cont_int[0], index_r_con, confidence_contour
 
-def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map):
+def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
     if not len(cnts):
         return [], []
-    
-    confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
-    img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
-    ##cnts = list( (np.array(cnts)/2).astype(np.int16) )
-    #cnts = cnts/2
-    cnts = [(i/6).astype(int) for i in cnts]
-    results = map(partial(do_back_rotation_and_get_cnt_back,
-                          img=img,
-                          slope_first=slope_first,
-                          confidence_matrix=confidence_matrix,
-                          ),
-                  cnts, range(len(cnts)))
-    contours, indexes, conf_contours = tuple(zip(*results))
-    return [i*6 for i in contours], list(conf_contours)
+
+    confidence_matrix = cv2.resize(confidence_matrix,
+                                   (img.shape[1] // 6, img.shape[0] // 6),
+                                   interpolation=cv2.INTER_NEAREST)
+    confs = []
+    for cnt in cnts:
+        cnt_mask = np.zeros(confidence_matrix.shape)
+        cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
+        confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
+    return cnts, confs
 
 def return_contours_of_interested_textline(region_pre_p, pixel):
     # pixels of images are identified by 5

From 7b51fd662497ecd7c35b09764df2ed5c6b651a76 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 01:03:46 +0200
Subject: [PATCH 06/41] avoid creating invalid polygons via rounding

---
 src/eynollah/eynollah.py      | 5 +++--
 src/eynollah/utils/contour.py | 9 +++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 49f6b33..0f458b4 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3670,16 +3670,17 @@ class Eynollah:
         return x_differential_new
 
     def dilate_textregions_contours_textline_version(self, all_found_textline_polygons):
-        return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords,
+        return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
                           dtype=int)[:, np.newaxis]
                  for poly in region]
                 for region in all_found_textline_polygons]
 
     def dilate_textregions_contours(self, all_found_textline_polygons):
-        return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords,
+        return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
                          dtype=int)[:, np.newaxis]
                 for poly in all_found_textline_polygons]
 
+
     def dilate_textline_contours(self, all_found_textline_polygons):
         for j in range(len(all_found_textline_polygons)):
             for ij in range(len(all_found_textline_polygons[j])):
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 249748a..8205c2b 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -49,7 +49,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area
             area <= max_area * np.prod(image.shape[:2]) and
             hierarchy[0][jv][3] == -1):
             found_polygons_early.append(np.array([[point]
-                                                  for point in polygon.exterior.coords], dtype=np.uint))
+                                                  for point in polygon.exterior.coords[:-1]], dtype=np.uint))
     return found_polygons_early
 
 def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
@@ -70,7 +70,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
             True):
             # print(c[0][0][1])
             found_polygons_early.append(np.array([[point]
-                                                  for point in polygon.exterior.coords], dtype=np.int32))
+                                                  for point in polygon.exterior.coords[:-1]], dtype=np.int32))
     return found_polygons_early
 
 def find_new_features_of_contours(contours_main):
@@ -330,6 +330,11 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
 
 def make_valid(polygon: Polygon) -> Polygon:
     """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
+    def isint(x):
+        return isinstance(x, int) or int(x) == x
+    # make sure rounding does not invalidate
+    if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
+        polygon = Polygon(np.round(polygon.exterior.coords))
     points = list(polygon.exterior.coords)
     # try by re-arranging points
     for split in range(1, len(points)):

From e730725da3d40cfbd20f857c36843190713725ca Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 01:05:15 +0200
Subject: [PATCH 07/41] 
 check_any_text_region_in_model_one_is_main_or_header_light: return original
 instead of resampled contours

---
 src/eynollah/utils/__init__.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index 3c130d7..c479744 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -957,11 +957,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
     regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
                                                          regions_model_full.shape[0] // zoom),
                                     interpolation=cv2.INTER_NEAREST)
-    contours_only_text_parent = [(i / zoom).astype(int) for i in  contours_only_text_parent]
+    contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent]
 
     ###
     cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
-        find_new_features_of_contours(contours_only_text_parent)
+        find_new_features_of_contours(contours_only_text_parent_z)
 
     length_con=x_max_main-x_min_main
     height_con=y_max_main-y_min_main
@@ -984,8 +984,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
     contours_only_text_parent_main_d=[]
     contours_only_text_parent_head_d=[]
 
-    for ii in range(len(contours_only_text_parent)):
-        con=contours_only_text_parent[ii]
+    for ii, con in enumerate(contours_only_text_parent_z):
         img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3))
         img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
 
@@ -996,23 +995,22 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
 
         if (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ):
             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
-            contours_only_text_parent_head.append(con)
+            contours_only_text_parent_head.append(contours_only_text_parent[ii])
+            conf_contours_head.append(None) # why not conf_contours[ii], too?
             if contours_only_text_parent_d_ordered is not None:
                 contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
             all_box_coord_head.append(all_box_coord[ii])
             slopes_head.append(slopes[ii])
             all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
-            conf_contours_head.append(None)
         else:
             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
-            contours_only_text_parent_main.append(con)
+            contours_only_text_parent_main.append(contours_only_text_parent[ii])
             conf_contours_main.append(conf_contours[ii])
             if contours_only_text_parent_d_ordered is not None:
                 contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
             all_box_coord_main.append(all_box_coord[ii])
             slopes_main.append(slopes[ii])
             all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
-
         #print(all_pixels,pixels_main,pixels_header)
 
     ### to make it faster
@@ -1020,8 +1018,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
     # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
     #                                       regions_model_full.shape[0] // zoom),
     #                                 interpolation=cv2.INTER_NEAREST)
-    contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
-    contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
     ###
 
     return (regions_model_1,

From 17bcf1af71802d790f7508d52221d64ea4fff939 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 01:32:32 +0200
Subject: [PATCH 08/41] =?UTF-8?q?rename=20*lines=5Fxml=20=E2=86=92=20*sepl?=
 =?UTF-8?q?ines=20for=20clarity?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/eynollah/eynollah.py | 58 ++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 0f458b4..c04c481 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -1713,9 +1713,9 @@ class Eynollah:
         mask_texts_only = (prediction_regions_org[:,:] ==1)*1
         mask_images_only=(prediction_regions_org[:,:] ==2)*1
 
-        polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
-        polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(
-            mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
+        polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+        polygons_seplines = textline_con_fil = filter_contours_area_of_image(
+            mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
 
         polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
         polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@@ -1779,7 +1779,7 @@ class Eynollah:
                                                         [page_coord_img[2], page_coord_img[1]]]))
 
         self.logger.debug("exit get_regions_extract_images_only")
-        return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
+        return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page
 
     def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
         self.logger.debug("enter get_regions_light_v")
@@ -1895,24 +1895,24 @@ class Eynollah:
             mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
             mask_images_only=(prediction_regions_org[:,:] ==2)*1
 
-            polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
+            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
             test_khat = np.zeros(prediction_regions_org.shape)
-            test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1))
+            test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1))
 
             #plt.imshow(test_khat[:,:])
             #plt.show()
             #for jv in range(1):
-                #print(jv, hir_lines_xml[0][232][3])
+                #print(jv, hir_seplines[0][232][3])
                 #test_khat = np.zeros(prediction_regions_org.shape)
-                #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1))
+                #test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1))
                 #plt.imshow(test_khat[:,:])
                 #plt.show()
 
-            polygons_lines_xml = filter_contours_area_of_image(
-                mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
+            polygons_seplines = filter_contours_area_of_image(
+                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
 
             test_khat = np.zeros(prediction_regions_org.shape)
-            test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1))
+            test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1))
 
             #plt.imshow(test_khat[:,:])
             #plt.show()
@@ -1937,7 +1937,7 @@ class Eynollah:
             #plt.show()
             #print("inside 4 ", time.time()-t_in)
             self.logger.debug("exit get_regions_light_v")
-            return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix
+            return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix
         else:
             img_bin = resize_image(img_bin,img_height_h, img_width_h )
             self.logger.debug("exit get_regions_light_v")
@@ -2020,9 +2020,9 @@ class Eynollah:
             mask_texts_only=(prediction_regions_org[:,:]==1)*1
             mask_images_only=(prediction_regions_org[:,:]==2)*1
 
-            polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
-            polygons_lines_xml = filter_contours_area_of_image(
-                mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
+            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+            polygons_seplines = filter_contours_area_of_image(
+                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
@@ -2034,7 +2034,7 @@ class Eynollah:
             text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
 
             self.logger.debug("exit get_regions_from_xy_2models")
-            return text_regions_p_true, erosion_hurts, polygons_lines_xml
+            return text_regions_p_true, erosion_hurts, polygons_seplines
         except:
             if self.input_binary:
                 prediction_bin = np.copy(img_org)
@@ -2069,9 +2069,9 @@ class Eynollah:
             mask_texts_only = (prediction_regions_org == 1)*1
             mask_images_only= (prediction_regions_org == 2)*1
 
-            polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
-            polygons_lines_xml = filter_contours_area_of_image(
-                mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
+            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
+            polygons_seplines = filter_contours_area_of_image(
+                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@@ -2084,7 +2084,7 @@ class Eynollah:
 
             erosion_hurts = True
             self.logger.debug("exit get_regions_from_xy_2models")
-            return text_regions_p_true, erosion_hurts, polygons_lines_xml
+            return text_regions_p_true, erosion_hurts, polygons_seplines
 
     def do_order_of_regions_full_layout(
             self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
@@ -4102,7 +4102,7 @@ class Eynollah:
         img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
         self.logger.info("Enhancing took %.1fs ", time.time() - t0)
         if self.extract_only_images:
-            text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
+            text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \
                 self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
             ocr_all_textlines = None
             pcgts = self.writer.build_pagexml_no_full_layout(
@@ -4145,7 +4145,7 @@ class Eynollah:
             polygons_of_marginals = []
             all_found_textline_polygons_marginals = []
             all_box_coord_marginals = []
-            polygons_lines_xml = []
+            polygons_seplines = []
             contours_tables = []
             ocr_all_textlines = None
             conf_contours_textregions =None
@@ -4153,13 +4153,13 @@ class Eynollah:
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+                cont_page, polygons_seplines, contours_tables, ocr_all_textlines, conf_contours_textregions)
             return pcgts
 
         #print("text region early -1 in %.1fs", time.time() - t0)
         t1 = time.time()
         if self.light_version:
-            text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
+            text_regions_p_1, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
                 self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
             #print("text region early -2 in %.1fs", time.time() - t0)
 
@@ -4186,7 +4186,7 @@ class Eynollah:
             textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
             #print("text region early -4 in %.1fs", time.time() - t0)
         else:
-            text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
+            text_regions_p_1, erosion_hurts, polygons_seplines = \
                 self.get_regions_from_xy_2models(img_res, is_image_enhanced,
                                                  num_col_classifier)
             self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
@@ -4385,13 +4385,13 @@ class Eynollah:
                     [], [], page_coord, [], [], [], [], [], [],
                     polygons_of_images, contours_tables, [],
                     polygons_of_marginals, empty_marginals, empty_marginals, [], [], [],
-                    cont_page, polygons_lines_xml, [], [], [])
+                    cont_page, polygons_seplines, [], [], [])
             else:
                 pcgts = self.writer.build_pagexml_no_full_layout(
                     [], page_coord, [], [], [], [],
                     polygons_of_images,
                     polygons_of_marginals, empty_marginals, empty_marginals, [], [],
-                    cont_page, polygons_lines_xml, contours_tables, [], [])
+                    cont_page, polygons_seplines, contours_tables, [], [])
             return pcgts
 
 
@@ -4586,7 +4586,7 @@ class Eynollah:
                 all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
                 polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals,
-                cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h)
+                cont_page, polygons_seplines, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h)
             return pcgts
 
         contours_only_text_parent_h = None
@@ -4665,7 +4665,7 @@ class Eynollah:
             txt_con_org, page_coord, order_text_new, id_of_texts_tot,
             all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals,
             all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+            cont_page, polygons_seplines, contours_tables, ocr_all_textlines, conf_contours_textregions)
         return pcgts
 
 

From a433c736281dcf86630f80bfa686064814b313d9 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 01:33:16 +0200
Subject: [PATCH 09/41] filter_contours_area_of_image*: also ensure validity
 here

---
 src/eynollah/eynollah.py      | 4 ++--
 src/eynollah/utils/contour.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index c04c481..7b3b81a 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3671,13 +3671,13 @@ class Eynollah:
 
     def dilate_textregions_contours_textline_version(self, all_found_textline_polygons):
         return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
-                          dtype=int)[:, np.newaxis]
+                          dtype=np.uint)[:, np.newaxis]
                  for poly in region]
                 for region in all_found_textline_polygons]
 
     def dilate_textregions_contours(self, all_found_textline_polygons):
         return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
-                         dtype=int)[:, np.newaxis]
+                         dtype=np.uint)[:, np.newaxis]
                 for poly in all_found_textline_polygons]
 
 
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 8205c2b..03d45b7 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -48,8 +48,8 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area
         if (area >= min_area * np.prod(image.shape[:2]) and
             area <= max_area * np.prod(image.shape[:2]) and
             hierarchy[0][jv][3] == -1):
-            found_polygons_early.append(np.array([[point]
-                                                  for point in polygon.exterior.coords[:-1]], dtype=np.uint))
+            found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1],
+                                                 dtype=np.uint)[:, np.newaxis])
     return found_polygons_early
 
 def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
@@ -69,8 +69,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
             # hierarchy[0][jv][3]==-1
             True):
             # print(c[0][0][1])
-            found_polygons_early.append(np.array([[point]
-                                                  for point in polygon.exterior.coords[:-1]], dtype=np.int32))
+            found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1],
+                                                 dtype=np.uint)[:, np.newaxis])
     return found_polygons_early
 
 def find_new_features_of_contours(contours_main):

From 0650274ffad576acde6048822b5f74b6303ef689 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 01:42:46 +0200
Subject: [PATCH 10/41] =?UTF-8?q?move=20dilate=5F*=5Fcontours=20to=20.util?=
 =?UTF-8?q?s.contour,=20rename=20dilate=5Ftextregions=5Fcontours=5Ftextlin?=
 =?UTF-8?q?e=5Fversion=20=E2=86=92=20dilate=5Ftextline=5Fcontours?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/eynollah/eynollah.py      | 253 ++--------------------------------
 src/eynollah/utils/contour.py |  11 ++
 2 files changed, 22 insertions(+), 242 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 7b3b81a..fe233cb 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -69,12 +69,13 @@ from .utils.contour import (
     get_text_region_boxes_by_given_contours,
     get_textregion_contours_in_org_image,
     get_textregion_contours_in_org_image_light,
-    make_valid,
     return_contours_of_image,
     return_contours_of_interested_region,
     return_contours_of_interested_region_by_min_size,
     return_contours_of_interested_textline,
     return_parent_contours,
+    dilate_textregion_contours,
+    dilate_textline_contours,
 )
 from .utils.rotate import (
     rotate_image,
@@ -1919,7 +1920,7 @@ class Eynollah:
             #sys.exit()
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
-            ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts)
+            ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
 
             text_regions_p_true = np.zeros(prediction_regions_org.shape)
@@ -3669,117 +3670,6 @@ class Eynollah:
 
         return x_differential_new
 
-    def dilate_textregions_contours_textline_version(self, all_found_textline_polygons):
-        return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
-                          dtype=np.uint)[:, np.newaxis]
-                 for poly in region]
-                for region in all_found_textline_polygons]
-
-    def dilate_textregions_contours(self, all_found_textline_polygons):
-        return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
-                         dtype=np.uint)[:, np.newaxis]
-                for poly in all_found_textline_polygons]
-
-
-    def dilate_textline_contours(self, all_found_textline_polygons):
-        for j in range(len(all_found_textline_polygons)):
-            for ij in range(len(all_found_textline_polygons[j])):
-                con_ind = all_found_textline_polygons[j][ij]
-                area = cv2.contourArea(con_ind)
-
-                con_ind = con_ind.astype(float)
-
-                x_differential = np.diff( con_ind[:,0,0])
-                y_differential = np.diff( con_ind[:,0,1])
-
-                x_differential = gaussian_filter1d(x_differential, 3)
-                y_differential = gaussian_filter1d(y_differential, 3)
-
-                x_min = float(np.min( con_ind[:,0,0] ))
-                y_min = float(np.min( con_ind[:,0,1] ))
-
-                x_max = float(np.max( con_ind[:,0,0] ))
-                y_max = float(np.max( con_ind[:,0,1] ))
-
-                x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential]
-                y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential]
-
-                abs_diff=abs(abs(x_differential)- abs(y_differential) )
-
-                inc_x = np.zeros(len(x_differential)+1)
-                inc_y = np.zeros(len(x_differential)+1)
-
-                if (y_max-y_min) <= (x_max-x_min):
-                    dilation_m1 = round(area / (x_max-x_min) * 0.35)
-                else:
-                    dilation_m1 = round(area / (y_max-y_min) * 0.35)
-
-                if dilation_m1>12:
-                    dilation_m1 = 12
-                if dilation_m1<4:
-                    dilation_m1 = 4
-                #print(dilation_m1, 'dilation_m1')
-                dilation_m2 = int(dilation_m1/2.) +1
-
-                for i in range(len(x_differential)):
-                    if abs_diff[i]==0:
-                        inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
-                        inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
-                    elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0:
-                        inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
-                    elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0:
-                        inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
-
-                    elif abs_diff[i]!=0 and abs_diff[i]>=3:
-                        if abs(x_differential[i])>abs(y_differential[i]):
-                            inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
-                        else:
-                            inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
-                    else:
-                        inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
-                        inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
-
-                inc_x[0] = inc_x[-1]
-                inc_y[0] = inc_y[-1]
-
-                con_scaled = con_ind*1
-
-                con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:]
-                con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:]
-
-                con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
-                con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
-
-                con_ind = con_ind.astype(np.int32)
-
-                results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False)
-                           for ind in range(len(con_scaled[:,0, 1])) ]
-                results = np.array(results)
-                results[results==0] = 1
-
-                diff_result = np.diff(results)
-
-                indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2]
-                indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2]
-
-                if results[0]==1:
-                    con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1]
-                    con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0]
-                    indices_m2 = indices_m2[1:]
-
-                if len(indices_2)>len(indices_m2):
-                    con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1]
-                    con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0]
-                    indices_2 = indices_2[:-1]
-
-                for ii in range(len(indices_2)):
-                    con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1]
-                    con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0]
-
-                all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1]
-                all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0]
-        return all_found_textline_polygons
-
     def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"):
         if type_contour=="textregion":
             areas = [cv2.contourArea(contours[j]) for j in range(len(contours))]
@@ -3917,121 +3807,6 @@ class Eynollah:
 
         return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours)))
 
-    def dilate_textlines(self, all_found_textline_polygons):
-        for j in range(len(all_found_textline_polygons)):
-            for i in range(len(all_found_textline_polygons[j])):
-                con_ind = all_found_textline_polygons[j][i]
-                con_ind = con_ind.astype(float)
-
-                x_differential = np.diff( con_ind[:,0,0])
-                y_differential = np.diff( con_ind[:,0,1])
-
-                x_min = float(np.min( con_ind[:,0,0] ))
-                y_min = float(np.min( con_ind[:,0,1] ))
-
-                x_max = float(np.max( con_ind[:,0,0] ))
-                y_max = float(np.max( con_ind[:,0,1] ))
-
-                if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70:
-                    x_biger_than_x = np.abs(x_differential) > np.abs(y_differential)
-                    mult = x_biger_than_x*x_differential
-
-                    arg_min_mult = np.argmin(mult)
-                    arg_max_mult = np.argmax(mult)
-
-                    if y_differential[0]==0:
-                        y_differential[0] = 0.1
-                    if y_differential[-1]==0:
-                        y_differential[-1]= 0.1
-                    y_differential = [y_differential[ind] if y_differential[ind] != 0
-                                      else 0.5 * (y_differential[ind-1] + y_differential[ind+1])
-                                      for ind in range(len(y_differential))]
-
-                    if y_differential[0]==0.1:
-                        y_differential[0] = y_differential[1]
-                    if y_differential[-1]==0.1:
-                        y_differential[-1] = y_differential[-2]
-                    y_differential.append(y_differential[0])
-
-                    y_differential = [-1 if y_differential[ind] < 0 else 1
-                                      for ind in range(len(y_differential))]
-                    y_differential = self.return_it_in_two_groups(y_differential)
-                    y_differential = np.array(y_differential)
-
-                    con_scaled = con_ind*1
-                    con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential
-                    con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8
-                    con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8
-
-                    try:
-                        con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5
-                        con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5
-                    except:
-                        pass
-
-                    con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8
-                    con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8
-
-                    try:
-                        con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5
-                        con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5
-                    except:
-                        pass
-
-                else:
-                    y_biger_than_x = np.abs(y_differential) > np.abs(x_differential)
-                    mult = y_biger_than_x*y_differential
-
-                    arg_min_mult = np.argmin(mult)
-                    arg_max_mult = np.argmax(mult)
-
-                    if x_differential[0]==0:
-                        x_differential[0] = 0.1
-                    if x_differential[-1]==0:
-                        x_differential[-1]= 0.1
-                    x_differential = [x_differential[ind] if x_differential[ind] != 0
-                                      else 0.5 * (x_differential[ind-1] + x_differential[ind+1])
-                                      for ind in range(len(x_differential))]
-
-                    if x_differential[0]==0.1:
-                        x_differential[0] = x_differential[1]
-                    if x_differential[-1]==0.1:
-                        x_differential[-1] = x_differential[-2]
-                    x_differential.append(x_differential[0])
-
-                    x_differential = [-1 if x_differential[ind] < 0 else 1
-                                      for ind in range(len(x_differential))]
-                    x_differential = self.return_it_in_two_groups(x_differential)
-                    x_differential = np.array(x_differential)
-
-                    con_scaled = con_ind*1
-                    con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential
-                    con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8
-                    con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8
-
-                    try:
-                        con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5
-                        con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5
-                    except:
-                        pass
-
-                    con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8
-                    con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8
-
-                    try:
-                        con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5
-                        con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5
-                    except:
-                        pass
-
-                con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
-                con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
-
-                all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1]
-                all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
-
-        return all_found_textline_polygons
-
     def delete_regions_without_textlines(
             self, slopes, all_found_textline_polygons, boxes_text, txt_con_org,
             contours_only_text_parent, index_by_text_par_con):
@@ -4130,8 +3905,7 @@ class Eynollah:
 
             all_found_textline_polygons=[ all_found_textline_polygons ]
 
-            all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
-                all_found_textline_polygons)
+            all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
             all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                 all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
 
@@ -4255,14 +4029,14 @@ class Eynollah:
                 boxes, boxes_d, polygons_of_marginals, contours_tables = \
                 self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                               num_col_classifier, table_prediction, erosion_hurts)
-            ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
+            ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
         else:
             polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
                 regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
                 self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                            num_col_classifier, img_only_regions, table_prediction, erosion_hurts,
                                            img_bin_light if self.light_version else None)
-            ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
+            ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
             if self.light_version:
                 drop_label_in_full_layout = 4
                 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0
@@ -4398,15 +4172,14 @@ class Eynollah:
 
         #print("text region early 3 in %.1fs", time.time() - t0)
         if self.light_version:
-            contours_only_text_parent = self.dilate_textregions_contours(
-                contours_only_text_parent)
+            contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
             contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one(
                 contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals)
             #print("text region early 3.5 in %.1fs", time.time() - t0)
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
                 contours_only_text_parent, self.image, confidence_matrix)
-            #txt_con_org = self.dilate_textregions_contours(txt_con_org)
-            #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent)
+            #txt_con_org = dilate_textregion_contours(txt_con_org)
+            #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
         else:
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
                 contours_only_text_parent, self.image, confidence_matrix)
@@ -4433,14 +4206,10 @@ class Eynollah:
                     #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \
                     #    self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals,
                     #        boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
-                    #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
-                    #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
-                    all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
-                        all_found_textline_polygons)
+                    all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
                     all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                         all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
-                    all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(
-                        all_found_textline_polygons_marginals)
+                    all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals)
                     contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \
                         index_by_text_par_con = self.filter_contours_without_textline_inside(
                             contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions)
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 03d45b7..f228e53 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -328,6 +328,17 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
 
     return img_ret[:, :, 0]
 
+def dilate_textline_contours(self, all_found_textline_polygons):
+    return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
+                      dtype=np.uint)[:, np.newaxis]
+             for poly in region]
+            for region in all_found_textline_polygons]
+
+def dilate_textregion_contours(self, all_found_textline_polygons):
+    return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
+                     dtype=np.uint)[:, np.newaxis]
+            for poly in all_found_textline_polygons]
+
 def make_valid(polygon: Polygon) -> Polygon:
     """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
     def isint(x):

From f3faa29528ce7acdafa0c02fc2a9ec4732d91e4a Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 12:59:03 +0200
Subject: [PATCH 11/41] refactor shapely converisons into contour2polygon /
 polygon2contour, also handle heterogeneous geometries

---
 src/eynollah/eynollah.py      |   1 -
 src/eynollah/utils/contour.py | 107 ++++++++++++++++++++++++++--------
 2 files changed, 83 insertions(+), 25 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index fe233cb..54ace30 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -27,7 +27,6 @@ from loky import ProcessPoolExecutor
 import xml.etree.ElementTree as ET
 import cv2
 import numpy as np
-from shapely.geometry import Polygon
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
 from numba import cuda
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index f228e53..1123241 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -1,7 +1,15 @@
+from typing import Sequence, Union
+from numbers import Number
 from functools import partial
+import itertools
+
 import cv2
 import numpy as np
-from shapely.geometry import Polygon
+from scipy.sparse.csgraph import minimum_spanning_tree
+from shapely.geometry import Polygon, LineString
+from shapely.geometry.polygon import orient
+from shapely import set_precision
+from shapely.ops import unary_union, nearest_points
 
 from .rotate import rotate_image, rotation_image_new
 
@@ -37,29 +45,28 @@ def get_text_region_boxes_by_given_contours(contours):
 
     return boxes, contours_new
 
-def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
+def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
     found_polygons_early = []
-    for jv,c in enumerate(contours):
-        if len(c) < 3:  # A polygon cannot have less than 3 points
+    for jv, contour in enumerate(contours):
+        if len(contour) < 3:  # A polygon cannot have less than 3 points
             continue
 
-        polygon = Polygon([point[0] for point in c])
+        polygon = contour2polygon(contour, dilate=dilate)
         area = polygon.area
         if (area >= min_area * np.prod(image.shape[:2]) and
             area <= max_area * np.prod(image.shape[:2]) and
             hierarchy[0][jv][3] == -1):
-            found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1],
-                                                 dtype=np.uint)[:, np.newaxis])
+            found_polygons_early.append(polygon2contour(polygon))
     return found_polygons_early
 
-def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
+def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
     found_polygons_early = []
-    for jv,c in enumerate(contours):
-        if len(c) < 3:  # A polygon cannot have less than 3 points
+    for jv, contour in enumerate(contours):
+        if len(contour) < 3:  # A polygon cannot have less than 3 points
             continue
 
-        polygon = Polygon([point[0] for point in c])
-        # area = cv2.contourArea(c)
+        polygon = contour2polygon(contour, dilate=dilate)
+        # area = cv2.contourArea(contour)
         area = polygon.area
         ##print(np.prod(thresh.shape[:2]))
         # Check that polygon has area greater than minimal area
@@ -68,9 +75,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
             area <= max_area * np.prod(image.shape[:2]) and
             # hierarchy[0][jv][3]==-1
             True):
-            # print(c[0][0][1])
-            found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1],
-                                                 dtype=np.uint)[:, np.newaxis])
+            # print(contour[0][0][1])
+            found_polygons_early.append(polygon2contour(polygon))
     return found_polygons_early
 
 def find_new_features_of_contours(contours_main):
@@ -328,16 +334,29 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
 
     return img_ret[:, :, 0]
 
-def dilate_textline_contours(self, all_found_textline_polygons):
-    return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
-                      dtype=np.uint)[:, np.newaxis]
-             for poly in region]
+def dilate_textline_contours(all_found_textline_polygons):
+    return [[polygon2contour(contour2polygon(contour, dilate=5))
+             for contour in region]
             for region in all_found_textline_polygons]
 
-def dilate_textregion_contours(self, all_found_textline_polygons):
-    return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1],
-                     dtype=np.uint)[:, np.newaxis]
-            for poly in all_found_textline_polygons]
+def dilate_textregion_contours(all_found_textline_polygons):
+    return [polygon2contour(contour2polygon(contour, dilate=5))
+            for contour in all_found_textline_polygons]
+
+def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
+    polygon = Polygon([point[0] for point in contour])
+    if dilate:
+        polygon = polygon.buffer(dilate)
+    if polygon.geom_type == 'GeometryCollection':
+        # heterogeneous result: filter zero-area shapes (LineString, Point)
+        polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
+    if polygon.geom_type == 'MultiPolygon':
+        # homogeneous result: construct convex hull to connect
+        polygon = join_polygons(polygon.geoms)
+    return make_valid(polygon)
+
+def polygon2contour(polygon: Polygon) -> np.ndarray:
+    return np.array(polygon.exterior.coords[:-1], dtype=np.uint)[:, np.newaxis]
 
 def make_valid(polygon: Polygon) -> Polygon:
     """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
@@ -346,7 +365,7 @@ def make_valid(polygon: Polygon) -> Polygon:
     # make sure rounding does not invalidate
     if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
         polygon = Polygon(np.round(polygon.exterior.coords))
-    points = list(polygon.exterior.coords)
+    points = list(polygon.exterior.coords[:-1])
     # try by re-arranging points
     for split in range(1, len(points)):
         if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
@@ -368,3 +387,43 @@ def make_valid(polygon: Polygon) -> Polygon:
         polygon = polygon.buffer(tolerance)
     assert polygon.is_valid, polygon.wkt
     return polygon
+
+def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
+    """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
+    # ensure input polygons are simply typed and all oriented equally
+    polygons = [orient(poly)
+                for poly in itertools.chain.from_iterable(
+                        [poly.geoms
+                         if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
+                         else [poly]
+                         for poly in polygons])]
+    npoly = len(polygons)
+    if npoly == 1:
+        return polygons[0]
+    # find min-dist path through all polygons (travelling salesman)
+    pairs = itertools.combinations(range(npoly), 2)
+    dists = np.zeros((npoly, npoly), dtype=float)
+    for i, j in pairs:
+        dist = polygons[i].distance(polygons[j])
+        if dist < 1e-5:
+            dist = 1e-5 # if pair merely touches, we still need to get an edge
+        dists[i, j] = dist
+        dists[j, i] = dist
+    dists = minimum_spanning_tree(dists, overwrite=True)
+    # add bridge polygons (where necessary)
+    for prevp, nextp in zip(*dists.nonzero()):
+        prevp = polygons[prevp]
+        nextp = polygons[nextp]
+        nearest = nearest_points(prevp, nextp)
+        bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
+        polygons.append(bridgep)
+    jointp = unary_union(polygons)
+    assert jointp.geom_type == 'Polygon', jointp.wkt
+    # follow-up calculations will necessarily be integer;
+    # so anticipate rounding here and then ensure validity
+    jointp2 = set_precision(jointp, 1.0)
+    if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
+        jointp2 = Polygon(np.round(jointp.exterior.coords))
+        jointp2 = make_valid(jointp2)
+    assert jointp2.geom_type == 'Polygon', jointp2.wkt
+    return jointp2

From 7a9e8256ee8a4c777baa0bd972697cece3e269a5 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 21 Aug 2025 13:00:31 +0200
Subject: [PATCH 12/41] =?UTF-8?q?increase=20dilatation:=20textregions/line?=
 =?UTF-8?q?s=20(5=E2=86=926),=20seplines=20(0=E2=86=921)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/eynollah/eynollah.py      | 10 +++++-----
 src/eynollah/utils/contour.py |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 54ace30..8cb1d52 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -1714,8 +1714,8 @@ class Eynollah:
         mask_images_only=(prediction_regions_org[:,:] ==2)*1
 
         polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
-        polygons_seplines = textline_con_fil = filter_contours_area_of_image(
-            mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
+        polygons_seplines = filter_contours_area_of_image(
+            mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
 
         polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
         polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@@ -1909,7 +1909,7 @@ class Eynollah:
                 #plt.show()
 
             polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
+                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
 
             test_khat = np.zeros(prediction_regions_org.shape)
             test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1))
@@ -2022,7 +2022,7 @@ class Eynollah:
 
             polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
             polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
+                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
@@ -2071,7 +2071,7 @@ class Eynollah:
 
             polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
             polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001)
+                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
 
             polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
             polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 1123241..c571be6 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -335,12 +335,12 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
     return img_ret[:, :, 0]
 
 def dilate_textline_contours(all_found_textline_polygons):
-    return [[polygon2contour(contour2polygon(contour, dilate=5))
+    return [[polygon2contour(contour2polygon(contour, dilate=6))
              for contour in region]
             for region in all_found_textline_polygons]
 
 def dilate_textregion_contours(all_found_textline_polygons):
-    return [polygon2contour(contour2polygon(contour, dilate=5))
+    return [polygon2contour(contour2polygon(contour, dilate=6))
             for contour in all_found_textline_polygons]
 
 def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):

From 11e143afee1f446bfef7c6b19ba720e5cddb981d Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Fri, 29 Aug 2025 12:16:56 +0200
Subject: [PATCH 13/41] polygon2contour: avoid overflow

---
 src/eynollah/utils/contour.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index c571be6..2cd7080 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -356,7 +356,8 @@ def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number
     return make_valid(polygon)
 
 def polygon2contour(polygon: Polygon) -> np.ndarray:
-    return np.array(polygon.exterior.coords[:-1], dtype=np.uint)[:, np.newaxis]
+    polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
+    return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
 
 def make_valid(polygon: Polygon) -> Polygon:
     """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""

From 235539a35071559f8929bfcda9cb47d506c23d58 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Fri, 29 Aug 2025 12:19:37 +0200
Subject: [PATCH 14/41] filter_contours_without_textline_inside: avoid removing
 from identical lists twice

---
 src/eynollah/eynollah.py | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 8cb1d52..b636b09 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3764,7 +3764,9 @@ class Eynollah:
             return contours
 
     def filter_contours_without_textline_inside(
-            self, contours,text_con_org,  contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions):
+            self, contours, text_con_org, contours_textline,
+            contours_only_text_parent_d_ordered,
+            conf_contours_textregions):
         ###contours_txtline_of_all_textregions = []
         ###for jj in range(len(contours_textline)):
             ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj]
@@ -3788,23 +3790,23 @@ class Eynollah:
             ###if np.any(results==1):
                 ###contours_with_textline.append(con_tr)
 
-        textregion_index_to_del = []
+        textregion_index_to_del = set()
         for index_textregion, textlines_textregion in enumerate(contours_textline):
-            if len(textlines_textregion)==0:
-                textregion_index_to_del.append(index_textregion)
+            if len(textlines_textregion) == 0:
+                textregion_index_to_del.add(index_textregion)
+        def filterfun(lis):
+            if len(lis) == 0:
+                return []
+            if len(textregion_index_to_del) == 0:
+                return lis
+            return list(np.delete(lis, list(textregion_index_to_del)))
 
-        uniqe_args_trs = np.unique(textregion_index_to_del)
-        uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1]
-
-        for ind_u_a_trs in uniqe_args_trs_sorted:
-            conf_contours_textregions.pop(ind_u_a_trs)
-            contours.pop(ind_u_a_trs)
-            contours_textline.pop(ind_u_a_trs)
-            text_con_org.pop(ind_u_a_trs)
-            if len(contours_only_text_parent_d_ordered) > 0:
-                contours_only_text_parent_d_ordered.pop(ind_u_a_trs)
-
-        return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours)))
+        return (filterfun(contours),
+                filterfun(text_con_org),
+                filterfun(conf_contours_textregions),
+                filterfun(contours_textline),
+                filterfun(contours_only_text_parent_d_ordered),
+                np.arange(len(contours) - len(textregion_index_to_del)))
 
     def delete_regions_without_textlines(
             self, slopes, all_found_textline_polygons, boxes_text, txt_con_org,

From bca2ae3d78fcc6536c5365c9b93a0143ebbbf658 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Fri, 29 Aug 2025 12:37:44 +0200
Subject: [PATCH 15/41] get_marginals: exit early if no peaks found to avoid
 spurious overlap mask

---
 src/eynollah/utils/marginals.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py
index a29e50d..22ada4e 100644
--- a/src/eynollah/utils/marginals.py
+++ b/src/eynollah/utils/marginals.py
@@ -94,6 +94,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
         except:
             point_left=first_nonzero
 
+        if point_left == first_nonzero and point_right == last_nonzero:
+            return text_regions
 
 
         if point_right>=mask_marginals.shape[1]:

From 9b5182c1c07ebbdb65ea81978f9c667917b82743 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 26 Aug 2025 21:00:33 +0200
Subject: [PATCH 16/41] utils: introduce box2rect and box2slice

---
 src/eynollah/utils/__init__.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index c479744..bbf30a8 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -300,9 +300,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
             x_end_with_child_without_mother,
             new_main_sep_y)
 
+def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
+    return (box[1], box[1] + box[3],
+            box[0], box[0] + box[2])
+
+def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]:
+    return (slice(box[1], box[1] + box[3]),
+            slice(box[0], box[0] + box[2]))
+
 def crop_image_inside_box(box, img_org_copy):
-    image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
-    return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
+    image_box = img_org_copy[box2slice(box)]
+    return image_box, box2rect(box)
 
 def otsu_copy_binary(img):
     img_r = np.zeros((img.shape[0], img.shape[1], 3))

From 5bff2d156ab32b72470b547870874da3053a3d7b Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 26 Aug 2025 21:02:43 +0200
Subject: [PATCH 17/41] use box2rect instead of crop_image_inside_box when no
 image needed

---
 src/eynollah/eynollah.py             | 8 +++++---
 src/eynollah/utils/separate_lines.py | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index b636b09..6847c1f 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -98,6 +98,8 @@ from .utils.resize import resize_image
 from .utils import (
     boosting_headers_by_longshot_region_segmentation,
     crop_image_inside_box,
+    box2rect,
+    box2slice,
     find_num_col,
     otsu_copy_binary,
     put_drop_out_from_only_drop_model,
@@ -1542,7 +1544,7 @@ class Eynollah:
             all_found_textline_polygons.append(textlines_ins[::-1])
             slopes.append(slope_deskew)
 
-            _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated)
+            crop_coor = box2rect(boxes[index])
             all_box_coord.append(crop_coor)
 
         return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
@@ -1754,7 +1756,7 @@ class Eynollah:
                 ##polygons_of_images_fin.append(ploy_img_ind)
 
                 box = cv2.boundingRect(ploy_img_ind)
-                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
+                page_coord_img = box2rect(box)
                 # cont_page.append(np.array([[page_coord[2], page_coord[0]],
                 #                            [page_coord[3], page_coord[0]],
                 #                            [page_coord[3], page_coord[1]],
@@ -1768,7 +1770,7 @@ class Eynollah:
             if h < 150 or w < 150:
                 pass
             else:
-                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
+                page_coord_img = box2rect(box)
                 # cont_page.append(np.array([[page_coord[2], page_coord[0]],
                 #                            [page_coord[3], page_coord[0]],
                 #                            [page_coord[3], page_coord[1]],
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index ffbfff7..b1a90b5 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -18,6 +18,8 @@ from .contour import (
 from . import (
     find_num_col_deskew,
     crop_image_inside_box,
+    box2rect,
+    box2slice,
 )
 
 def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@@ -1540,7 +1542,7 @@ def do_work_of_slopes_new(
     logger.debug('enter do_work_of_slopes_new')
 
     x, y, w, h = box_text
-    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
+    crop_coor = box2rect(box_text)
     mask_textline = np.zeros(textline_mask_tot_ea.shape)
     mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
     all_text_region_raw = textline_mask_tot_ea * mask_textline
@@ -1631,7 +1633,7 @@ def do_work_of_slopes_new_curved(
             slope_for_all = slope_deskew
         slope = slope_for_all
 
-    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
+    crop_coor = box2rect(box_text)
 
     if abs(slope_for_all) < 45:
         textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
@@ -1685,7 +1687,7 @@ def do_work_of_slopes_new_light(
     logger.debug('enter do_work_of_slopes_new_light')
 
     x, y, w, h = box_text
-    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
+    crop_coor = box2rect(box_text)
     mask_textline = np.zeros(textline_mask_tot_ea.shape)
     mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
     all_text_region_raw = textline_mask_tot_ea * mask_textline

From 5b16c2fc0066f3e1542dfdf7a1fe9f9241401c38 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 26 Aug 2025 21:05:40 +0200
Subject: [PATCH 18/41] avoid pulling unused 'image_page_rotated' through
 functions

---
 src/eynollah/eynollah.py             | 48 +++++++++++++---------------
 src/eynollah/utils/separate_lines.py |  6 ++--
 2 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 6847c1f..8f66af5 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -1521,7 +1521,7 @@ class Eynollah:
         self.logger.debug("exit extract_text_regions")
         return prediction_regions, prediction_regions2
 
-    def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
+    def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
 
         polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001)
         M_main_tot = [cv2.moments(polygons_of_textlines[j])
@@ -1549,13 +1549,12 @@ class Eynollah:
 
         return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
 
-    def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
+    def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_light")
         results = self.executor.map(partial(do_work_of_slopes_new_light,
                                             textline_mask_tot_ea=textline_mask_tot,
-                                            image_page_rotated=image_page_rotated,
                                             slope_deskew=slope_deskew,textline_light=self.textline_light,
                                             logger=self.logger,),
                                     boxes, contours, contours_par, range(len(contours_par)))
@@ -1563,13 +1562,12 @@ class Eynollah:
         self.logger.debug("exit get_slopes_and_deskew_new_light")
         return tuple(zip(*results))
 
-    def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
+    def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new")
         results = self.executor.map(partial(do_work_of_slopes_new,
                                             textline_mask_tot_ea=textline_mask_tot,
-                                            image_page_rotated=image_page_rotated,
                                             slope_deskew=slope_deskew,
                                             MAX_SLOPE=MAX_SLOPE,
                                             KERNEL=KERNEL,
@@ -1580,13 +1578,12 @@ class Eynollah:
         self.logger.debug("exit get_slopes_and_deskew_new")
         return tuple(zip(*results))
 
-    def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
+    def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_curved")
         results = self.executor.map(partial(do_work_of_slopes_new_curved,
                                             textline_mask_tot_ea=textline_mask_tot,
-                                            image_page_rotated=image_page_rotated,
                                             mask_texts_only=mask_texts_only,
                                             num_col=num_col,
                                             scale_par=scale_par,
@@ -2935,10 +2932,10 @@ class Eynollah:
         return slope_deskew
 
     def run_marginals(
-            self, image_page, textline_mask_tot_ea, mask_images, mask_lines,
+            self, textline_mask_tot_ea, mask_images, mask_lines,
             num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
 
-        image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :]
+        textline_mask_tot = textline_mask_tot_ea[:, :]
         textline_mask_tot[mask_images[:, :] == 1] = 0
 
         text_regions_p_1[mask_lines[:, :] == 1] = 3
@@ -2957,10 +2954,7 @@ class Eynollah:
             except Exception as e:
                 self.logger.error("exception %s", e)
 
-        if self.plotter:
-            self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
-            self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
-        return textline_mask_tot, text_regions_p, image_page_rotated
+        return textline_mask_tot, text_regions_p
 
     def run_boxes_no_full_layout(
             self, image_page, textline_mask_tot, text_regions_p,
@@ -3112,7 +3106,7 @@ class Eynollah:
                 text_regions_p[:,:][table_prediction[:,:]==1] = 10
                 img_revised_tab = text_regions_p[:,:]
                 if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
+                    _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
                         rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
 
                     text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
@@ -3132,7 +3126,7 @@ class Eynollah:
 
             else:
                 if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
-                    image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
+                    _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
                         rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
 
                     text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
@@ -4010,9 +4004,12 @@ class Eynollah:
             text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
             table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
 
-        textline_mask_tot, text_regions_p, image_page_rotated = \
-            self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
+        textline_mask_tot, text_regions_p = \
+            self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines,
                                num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
+        if self.plotter:
+            self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
+            self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
 
         if self.light_version and num_col_classifier in (1,2):
             image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
@@ -4021,7 +4018,6 @@ class Eynollah:
             textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m )
             text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m )
             table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
-            image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
 
         self.logger.info("detection of marginals took %.1fs", time.time() - t1)
         #print("text region early 2 marginal in %.1fs", time.time() - t0)
@@ -4197,11 +4193,11 @@ class Eynollah:
                     all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
                         all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2(
                             txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org,
-                            image_page_rotated, boxes_text, slope_deskew)
+                            boxes_text, slope_deskew)
                     all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                         all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2(
                             polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org,
-                            image_page_rotated, boxes_marginals, slope_deskew)
+                            boxes_marginals, slope_deskew)
 
                     #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
                     #    self.delete_regions_without_textlines(slopes, all_found_textline_polygons,
@@ -4221,11 +4217,11 @@ class Eynollah:
                     all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \
                         index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light(
                             txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
-                            image_page_rotated, boxes_text, slope_deskew)
+                            boxes_text, slope_deskew)
                     all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                         all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light(
                             polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
-                            image_page_rotated, boxes_marginals, slope_deskew)
+                            boxes_marginals, slope_deskew)
                     #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                     #    all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
             else:
@@ -4233,25 +4229,25 @@ class Eynollah:
                 all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
                     all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new(
                         txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
-                        image_page_rotated, boxes_text, slope_deskew)
+                        boxes_text, slope_deskew)
                 all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                     all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new(
                         polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
-                        image_page_rotated, boxes_marginals, slope_deskew)
+                        boxes_marginals, slope_deskew)
         else:
             scale_param = 1
             textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
             all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
                 all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(
                     txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode,
-                    image_page_rotated, boxes_text, text_only,
+                    boxes_text, text_only,
                     num_col_classifier, scale_param, slope_deskew)
             all_found_textline_polygons = small_textlines_to_parent_adherence2(
                 all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
             all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
                 all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(
                     polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode,
-                    image_page_rotated, boxes_marginals, text_only,
+                    boxes_marginals, text_only,
                     num_col_classifier, scale_param, slope_deskew)
             all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
                 all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index b1a90b5..dcddc65 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1532,7 +1532,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
 
 def do_work_of_slopes_new(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, image_page_rotated, slope_deskew,
+        textline_mask_tot_ea, slope_deskew,
         logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
     if KERNEL is None:
@@ -1590,7 +1590,7 @@ def do_work_of_slopes_new(
 
 def do_work_of_slopes_new_curved(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
+        textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew,
         logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
     if KERNEL is None:
@@ -1679,7 +1679,7 @@ def do_work_of_slopes_new_curved(
 
 def do_work_of_slopes_new_light(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,
+        textline_mask_tot_ea, slope_deskew, textline_light,
         logger=None
 ):
     if logger is None:

From 4337d6298596b1272c35b909a0ec0ee50adc4ba2 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 26 Aug 2025 21:06:36 +0200
Subject: [PATCH 19/41] =?UTF-8?q?contours:=20rename=20'pixel'=20=E2=86=92?=
 =?UTF-8?q?=20'label'=20for=20clarity?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/eynollah/utils/contour.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py
index 2cd7080..0700ed4 100644
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@@ -141,12 +141,12 @@ def return_parent_contours(contours, hierarchy):
                        if hierarchy[0][i][3] == -1]
     return contours_parent
 
-def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
+def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -267,12 +267,12 @@ def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
         confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
     return cnts, confs
 
-def return_contours_of_interested_textline(region_pre_p, pixel):
+def return_contours_of_interested_textline(region_pre_p, label):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -295,12 +295,12 @@ def return_contours_of_image(image):
     contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
     return contours, hierarchy
 
-def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
+def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@@ -313,12 +313,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si
 
     return contours_imgs
 
-def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
+def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area):
     # pixels of images are identified by 5
     if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
     else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
     cnts_images = cnts_images.astype(np.uint8)
     cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
     imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)

From f458e3ece01aa7142c77b930dbdf1843c6835d85 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 26 Aug 2025 21:07:18 +0200
Subject: [PATCH 20/41] writer: SeparatorRegion needs SeparatorRegionType (not
 ImageRegionType)

---
 src/eynollah/writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index 92e353f..01c86de 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -296,7 +296,7 @@ class EynollahXmlWriter():
             page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
 
         for mm in range(len(polygons_lines_to_be_written_in_xml)):
-            page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
+            page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
 
         for mm in range(len(found_polygons_tables)):
             page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))

From dc0caad512219a2e08da3841c215167eed1526bb Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 26 Aug 2025 21:07:50 +0200
Subject: [PATCH 21/41] writer: use @type='heading' instead of 'header'

---
 src/eynollah/writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index 01c86de..b9e906a 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -268,7 +268,7 @@ class EynollahXmlWriter():
 
         self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
         for mm in range(len(found_polygons_text_region_h)):
-            textregion = TextRegionType(id=counter.next_region_id, type_='header',
+            textregion = TextRegionType(id=counter.next_region_id, type_='heading',
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
             page.add_TextRegion(textregion)
 

From abf5c0f845255f247ce4991d18a5b3b8a3808f4e Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 2 Sep 2025 15:01:52 +0200
Subject: [PATCH 22/41] get_smallest_skew: when shifting search range of
 rotation angle, compare resulting (maximum) variances instead of blindly
 assuming the new range is better

---
 src/eynollah/utils/separate_lines.py | 32 +++++++++++++++++-----------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index dcddc65..3363367 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1486,33 +1486,36 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
 
     if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
         angles = np.array([-45, 0, 45, 90,])
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
 
         angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
     elif main_page:
         angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
 
         early_slope_edge=11
         if abs(angle) > early_slope_edge:
             if angle < 0:
-                angles = np.linspace(-90, -12, n_tot_angles)
+                angles2 = np.linspace(-90, -12, n_tot_angles)
             else:
-                angles = np.linspace(90, 12, n_tot_angles)
-            angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+                angles2 = np.linspace(90, 12, n_tot_angles)
+            angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
+            if var2 > var:
+                angle = angle2
     else:
         angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
 
         early_slope_edge=22
         if abs(angle) > early_slope_edge:
             if angle < 0:
-                angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
+                angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
             else:
-                angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
-            angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
-
+                angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
+            angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
+            if var2 > var:
+                angle = angle2
     return angle
 
 def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
@@ -1524,11 +1527,14 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
     try:
         var_res = np.array(results)
         assert var_res.any()
-        angle = angles[np.argmax(var_res)]
+        idx = np.argmax(var_res)
+        angle = angles[idx]
+        var = var_res[idx]
     except:
         logger.exception("cannot determine best angle among %s", str(angles))
         angle = 0
-    return angle
+        var = 0
+    return angle, var
 
 def do_work_of_slopes_new(
         box_text, contour, contour_par, index_r_con,

From 8be2c7977101080856e4d6e43660a0de055b86c9 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Sep 2025 09:01:18 +0200
Subject: [PATCH 23/41] Revert "deskewing with faster multiprocessing"

This reverts commit 5db3e9fa64d39c128bd9bee27c9d0fb73b3459d2.
---
 src/eynollah/eynollah.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 8f66af5..b450b17 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -2926,6 +2926,7 @@ class Eynollah:
         #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
         slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
                                           map=self.executor.map, logger=self.logger, plotter=self.plotter)
+
         if self.plotter:
             self.plotter.save_deskewed_image(slope_deskew)
         self.logger.info("slope_deskew: %.2f°", slope_deskew)

From 31f240c3b8a6eaa034b5ae02cf009930e8275725 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 2 Sep 2025 15:04:04 +0200
Subject: [PATCH 24/41] do_image_rotation, do_work_of_slopes_new_curved: pass
 arrays via shared memory

---
 src/eynollah/eynollah.py             | 12 +++++---
 src/eynollah/utils/separate_lines.py | 12 ++++++--
 src/eynollah/utils/shm.py            | 45 ++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 7 deletions(-)
 create mode 100644 src/eynollah/utils/shm.py

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index b450b17..42af8e4 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -95,6 +95,7 @@ from .utils.drop_capitals import (
 )
 from .utils.marginals import get_marginals
 from .utils.resize import resize_image
+from .utils.shm import share_ndarray
 from .utils import (
     boosting_headers_by_longshot_region_segmentation,
     crop_image_inside_box,
@@ -1582,9 +1583,11 @@ class Eynollah:
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_curved")
-        results = self.executor.map(partial(do_work_of_slopes_new_curved,
-                                            textline_mask_tot_ea=textline_mask_tot,
-                                            mask_texts_only=mask_texts_only,
+        with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
+            with share_ndarray(mask_texts_only) as mask_texts_only_shared:
+                results = self.executor.map(partial(do_work_of_slopes_new_curved,
+                                            textline_mask_tot_ea=textline_mask_tot_shared,
+                                            mask_texts_only=mask_texts_only_shared,
                                             num_col=num_col,
                                             scale_par=scale_par,
                                             slope_deskew=slope_deskew,
@@ -1593,7 +1596,8 @@ class Eynollah:
                                             logger=self.logger,
                                             plotter=self.plotter,),
                                     boxes, contours, contours_par, range(len(contours_par)))
-        #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
+                #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
+                results = list(results) # exhaust prior to release
         self.logger.debug("exit get_slopes_and_deskew_new_curved")
         return tuple(zip(*results))
 
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 3363367..e4bb953 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -15,6 +15,7 @@ from .contour import (
     return_contours_of_interested_textline,
     find_contours_mean_y_diff,
 )
+from .shm import share_ndarray, wrap_ndarray_shared
 from . import (
     find_num_col_deskew,
     crop_image_inside_box,
@@ -1454,7 +1455,8 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
 
     return img_patch_interest_revised
 
-def do_image_rotation(angle, img, sigma_des, logger=None):
+@wrap_ndarray_shared(kw='img')
+def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
     if logger is None:
         logger = getLogger(__package__)
     img_rot = rotate_image(img, angle)
@@ -1521,7 +1523,8 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
 def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
     if logger is None:
         logger = getLogger(__package__)
-    results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles))
+    with share_ndarray(img) as img_shared:
+        results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=logger), angles))
     if plotter:
         plotter.save_plot_of_rotation_angle(angles, results)
     try:
@@ -1594,9 +1597,12 @@ def do_work_of_slopes_new(
 
     return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
 
+@wrap_ndarray_shared(kw='textline_mask_tot_ea')
+@wrap_ndarray_shared(kw='mask_texts_only')
 def do_work_of_slopes_new_curved(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew,
+        textline_mask_tot_ea=None, mask_texts_only=None,
+        num_col=1, scale_par=1.0, slope_deskew=0.0,
         logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
     if KERNEL is None:
diff --git a/src/eynollah/utils/shm.py b/src/eynollah/utils/shm.py
new file mode 100644
index 0000000..4b51053
--- /dev/null
+++ b/src/eynollah/utils/shm.py
@@ -0,0 +1,45 @@
+from multiprocessing import shared_memory
+from contextlib import contextmanager
+from functools import wraps
+import numpy as np
+
+@contextmanager
+def share_ndarray(array: np.ndarray):
+    size = np.dtype(array.dtype).itemsize * np.prod(array.shape)
+    shm = shared_memory.SharedMemory(create=True, size=size)
+    try:
+        shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf)
+        shared_array[:] = array[:]
+        shared_array.flags["WRITEABLE"] = False
+        yield dict(shape=array.shape, dtype=array.dtype, name=shm.name)
+    finally:
+        shm.close()
+        shm.unlink()
+
+@contextmanager
+def ndarray_shared(array: dict):
+    shm = shared_memory.SharedMemory(name=array['name'])
+    try:
+        array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf)
+        yield array
+    finally:
+        shm.close()
+
+def wrap_ndarray_shared(kw=None):
+    def wrapper(f):
+        if kw is None:
+            @wraps(f)
+            def shared_func(array, *args, **kwargs):
+                with ndarray_shared(array) as ndarray:
+                    return f(ndarray, *args, **kwargs)
+            return shared_func
+        else:
+            @wraps(f)
+            def shared_func(*args, **kwargs):
+                array = kwargs.pop(kw)
+                with ndarray_shared(array) as ndarray:
+                    kwargs[kw] = ndarray
+                    return f(*args, **kwargs)
+            return shared_func
+    return wrapper
+

From 0662ece536e090989ad4e2281317336129eae468 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 4 Sep 2025 15:18:55 +0200
Subject: [PATCH 25/41] do_work_of_slopes*: use shm also in non-light mode(s)

---
 src/eynollah/eynollah.py             | 33 ++++++++++++++++------------
 src/eynollah/utils/separate_lines.py |  6 +++--
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 42af8e4..6333ca5 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -1554,11 +1554,14 @@ class Eynollah:
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_light")
-        results = self.executor.map(partial(do_work_of_slopes_new_light,
-                                            textline_mask_tot_ea=textline_mask_tot,
-                                            slope_deskew=slope_deskew,textline_light=self.textline_light,
-                                            logger=self.logger,),
-                                    boxes, contours, contours_par, range(len(contours_par)))
+        with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
+            results = self.executor.map(partial(do_work_of_slopes_new_light,
+                                                textline_mask_tot_ea=textline_mask_tot_shared,
+                                                slope_deskew=slope_deskew,
+                                                textline_light=self.textline_light,
+                                                logger=self.logger,),
+                                        boxes, contours, contours_par, range(len(contours_par)))
+            results = list(results) # exhaust prior to release
         #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
         self.logger.debug("exit get_slopes_and_deskew_new_light")
         return tuple(zip(*results))
@@ -1567,14 +1570,16 @@ class Eynollah:
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new")
-        results = self.executor.map(partial(do_work_of_slopes_new,
-                                            textline_mask_tot_ea=textline_mask_tot,
-                                            slope_deskew=slope_deskew,
-                                            MAX_SLOPE=MAX_SLOPE,
-                                            KERNEL=KERNEL,
-                                            logger=self.logger,
-                                            plotter=self.plotter,),
-                                    boxes, contours, contours_par, range(len(contours_par)))
+        with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
+            results = self.executor.map(partial(do_work_of_slopes_new,
+                                                textline_mask_tot_ea=textline_mask_tot_shared,
+                                                slope_deskew=slope_deskew,
+                                                MAX_SLOPE=MAX_SLOPE,
+                                                KERNEL=KERNEL,
+                                                logger=self.logger,
+                                                plotter=self.plotter,),
+                                        boxes, contours, contours_par, range(len(contours_par)))
+            results = list(results) # exhaust prior to release
         #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
         self.logger.debug("exit get_slopes_and_deskew_new")
         return tuple(zip(*results))
@@ -1596,8 +1601,8 @@ class Eynollah:
                                             logger=self.logger,
                                             plotter=self.plotter,),
                                     boxes, contours, contours_par, range(len(contours_par)))
-                #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
                 results = list(results) # exhaust prior to release
+        #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
         self.logger.debug("exit get_slopes_and_deskew_new_curved")
         return tuple(zip(*results))
 
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index e4bb953..1a2f511 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1539,9 +1539,10 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
         var = 0
     return angle, var
 
+@wrap_ndarray_shared(kw='textline_mask_tot_ea')
 def do_work_of_slopes_new(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, slope_deskew,
+        textline_mask_tot_ea=None, slope_deskew=0.0,
         logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
     if KERNEL is None:
@@ -1689,9 +1690,10 @@ def do_work_of_slopes_new_curved(
 
     return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
 
+@wrap_ndarray_shared(kw='textline_mask_tot_ea')
 def do_work_of_slopes_new_light(
         box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, slope_deskew, textline_light,
+        textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
         logger=None
 ):
     if logger is None:

From 04c3d7dd1b98b01adf2b8ccd72830ad5fd9a4e95 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 18 Sep 2025 20:07:54 +0200
Subject: [PATCH 26/41] get_smallest_skew: avoid shm if no ProcessPoolExecutor
 is passed

---
 src/eynollah/utils/separate_lines.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 1a2f511..4d8badb 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1469,7 +1469,7 @@ def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
     return var
 
 def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
-                       main_page=False, logger=None, plotter=None, map=map):
+                       main_page=False, logger=None, plotter=None, map=None):
     if main_page and plotter:
         plotter.save_plot_of_textline_density(img_patch_org)
 
@@ -1523,8 +1523,13 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
 def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
     if logger is None:
         logger = getLogger(__package__)
-    with share_ndarray(img) as img_shared:
-        results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=logger), angles))
+    if map is None:
+        results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger)
+                   for angle in angles]
+    else:
+        with share_ndarray(img) as img_shared:
+            results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=logger),
+                               angles))
     if plotter:
         plotter.save_plot_of_rotation_angle(angles, results)
     try:

From b94c96fcbbb5bbce72bc9cdc9b334953abd774ad Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Sat, 20 Sep 2025 00:56:33 +0200
Subject: [PATCH 27/41] find_num_col: exit early if empty (avoiding exceptions)

---
 src/eynollah/utils/__init__.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index bbf30a8..9daec7d 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -383,6 +383,10 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
     return np.std(z)
 
 def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
+    if not regions_without_separators.any():
+        return 0, []
+    #plt.imshow(regions_without_separators)
+    #plt.show()
     regions_without_separators_0 = regions_without_separators.sum(axis=0)
     ##plt.plot(regions_without_separators_0)
     ##plt.show()
@@ -402,6 +406,9 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
     zneg = gaussian_filter1d(zneg, sigma_)
 
     peaks_neg, _ = find_peaks(zneg, height=0)
+    #plt.plot(zneg)
+    #plt.plot(peaks_neg, zneg[peaks_neg], 'rx')
+    #plt.show()
     peaks, _ = find_peaks(z, height=0)
     peaks_neg = peaks_neg - 10 - 10
 
@@ -416,9 +423,13 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
                           (peaks_neg < (regions_without_separators.shape[1] - 370))]
     interest_pos = z[peaks]
     interest_pos = interest_pos[interest_pos > 10]
+    if not interest_pos.any():
+        return 0, []
     # plt.plot(z)
     # plt.show()
     interest_neg = z[peaks_neg]
+    if not interest_neg.any():
+        return 0, []
 
     min_peaks_pos = np.min(interest_pos)
     max_peaks_pos = np.max(interest_pos)

From 0366707136568241c42bac2f3bf675dda5989fe2 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Sat, 20 Sep 2025 00:57:00 +0200
Subject: [PATCH 28/41] get_smallest_skew: do not pass logger

---
 src/eynollah/utils/separate_lines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 4d8badb..1d27a17 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1528,7 +1528,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
                    for angle in angles]
     else:
         with share_ndarray(img) as img_shared:
-            results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=logger),
+            results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None),
                                angles))
     if plotter:
         plotter.save_plot_of_rotation_angle(angles, results)

From 758602403eb92625608d04e7d77fcbf896c55e2d Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Sun, 21 Sep 2025 21:35:22 +0200
Subject: [PATCH 29/41] replace loky with
 concurrent.futures.ProcessPoolExecutor (faster)

---
 src/eynollah/eynollah.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 6333ca5..1c70498 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -23,7 +23,7 @@ import gc
 import copy
 import json
 
-from loky import ProcessPoolExecutor
+from concurrent.futures import ProcessPoolExecutor
 import xml.etree.ElementTree as ET
 import cv2
 import numpy as np
@@ -244,7 +244,7 @@ class Eynollah:
             self.num_col_lower = num_col_lower
         self.logger = logger if logger else getLogger('eynollah')
         # for parallelization of CPU-intensive tasks:
-        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
+        self.executor = ProcessPoolExecutor(max_workers=cpu_count())
         atexit.register(self.executor.shutdown)
         self.dir_models = dir_models
         self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"

From c0137c29ad46adf2096664632e9a20a30afbfe09 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 02:23:43 +0200
Subject: [PATCH 30/41] try to fix the failed outsourcing of utils_ocr

---
 src/eynollah/eynollah.py        | 63 ++-------------------------------
 src/eynollah/utils/utils_ocr.py |  1 +
 2 files changed, 3 insertions(+), 61 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 32490a2..192f6f4 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3917,34 +3917,6 @@ class Eynollah:
             region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
             return ordered, region_ids
 
-    def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
-        return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))]
-
-    def return_it_in_two_groups(self, x_differential):
-        split = [ind if x_differential[ind]!=x_differential[ind+1] else -1
-                 for ind in range(len(x_differential)-1)]
-        split_masked = list( np.array(split[:])[np.array(split[:])!=-1] )
-        if 0 not in split_masked:
-            split_masked.insert(0, -1)
-        split_masked.append(len(x_differential)-1)
-
-        split_masked = np.array(split_masked) +1
-
-        sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]])
-                for ind in range(len(split_masked)-1)]
-
-        indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and
-                                          np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1
-                                  for ind in range(1,len(sums)-1)]
-        indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1]
-
-        x_differential_new = np.copy(x_differential)
-        for i in indexes_to_bec_changed_filtered:
-            i_slice = slice(split_masked[i], split_masked[i+1])
-            x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice]
-
-        return x_differential_new
-
     def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot):
         width = np.shape(textline_image)[1]
         height = np.shape(textline_image)[0]
@@ -3988,36 +3960,6 @@ class Eynollah:
         else:
             pass
 
-    def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.06*width)
-
-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
-
-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
-
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
-            #print(len(peaks_real), 'len(peaks_real)')
-
-            peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-
-            arg_max = np.argmax(sum_smoothed[peaks_real])
-            peaks_final = peaks_real[arg_max]
-
-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peaks_final, peaks_final], [0, height-1])
-            ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
-
-            return peaks_final
-        else:
-            return None
-
     def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
             self, peaks_real, sum_smoothed, start_split, end_split):
 
@@ -4079,8 +4021,7 @@ class Eynollah:
             #width1 = int ( width/2. - common_window )
             #width2 = int ( width/2. + common_window )
 
-            split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
-                textline_image, ind_tot)
+            split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
             if split_point:
                 image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
                 image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
@@ -5144,7 +5085,7 @@ class Eynollah:
                         box_ind = all_box_coord[indexing]
                         #print(ind_poly,np.shape(ind_poly), 'ind_poly')
                         #print(box_ind)
-                        ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
+                        ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
                         #print(ind_poly_copy)
                         ind_poly[ind_poly<0] = 0
                     x, y, w, h = cv2.boundingRect(ind_poly)
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 4fa99f7..5f19387 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -92,6 +92,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t
         return peaks_final
     else:
         return None
+
 # Function to fit text inside the given area
 def fit_text_single_line(draw, text, font_path, max_width, max_height):
     initial_font_size = 50

From f857ee7b518e23c62b28aab32cd64d396da836fe Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Fri, 19 Sep 2025 02:12:18 +0200
Subject: [PATCH 31/41] simplify

---
 src/eynollah/eynollah.py       | 23 +++--------------------
 src/eynollah/utils/__init__.py |  2 +-
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 192f6f4..0c9692e 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3182,26 +3182,9 @@ class Eynollah:
             num_col = num_col + 1
             if not num_column_is_classified:
                 num_col_classifier = num_col + 1
-            if self.num_col_upper and self.num_col_lower:
-                if self.num_col_upper == self.num_col_lower:
-                    num_col_classifier = self.num_col_upper
-                else:
-                    if num_col_classifier < self.num_col_lower:
-                        num_col_classifier = self.num_col_lower
-                    if num_col_classifier > self.num_col_upper:
-                        num_col_classifier = self.num_col_upper
-                        
-            elif self.num_col_lower and not self.num_col_upper:
-                if num_col_classifier < self.num_col_lower:
-                    num_col_classifier = self.num_col_lower
-                    
-            elif self.num_col_upper and not self.num_col_lower:
-                if num_col_classifier > self.num_col_upper:
-                    num_col_classifier = self.num_col_upper
-                    
-            else:
-                pass
-                
+            num_col_classifier = min(self.num_col_upper or num_col_classifier,
+                                     max(self.num_col_lower or num_col_classifier,
+                                         num_col_classifier))
         except Exception as why:
             self.logger.error(why)
             num_col = None
diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index 243430e..f8926cf 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -1675,9 +1675,9 @@ def return_boxes_of_images_by_order_of_reading_new(
                 peaks_neg_fin=[]
                 num_col = 0
             try:
-                peaks_neg_fin_org=np.copy(peaks_neg_fin)
                 if (len(peaks_neg_fin)+1)<num_col_classifier or num_col_classifier==6:
                     #print('burda')
+                    peaks_neg_fin_org = np.copy(peaks_neg_fin)
                     if len(peaks_neg_fin)==0:
                         num_col, peaks_neg_fin = find_num_col(
                             regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],

From 08c8c260285ffefd82bd01409b7c1dbac1993aaf Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 03:52:19 +0200
Subject: [PATCH 32/41] indent extremely long lines

---
 src/eynollah/eynollah.py             | 750 ++++++++++++++++++---------
 src/eynollah/utils/__init__.py       |  30 +-
 src/eynollah/utils/separate_lines.py | 136 +++--
 src/eynollah/utils/utils_ocr.py      |  25 +-
 4 files changed, 652 insertions(+), 289 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 0c9692e..2e31433 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -272,7 +272,6 @@ class Eynollah:
         else:
             self.threshold_art_class_textline = 0.1
             
-        self.dir_models = dir_models
         self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
         self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
         self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425"
@@ -289,8 +288,17 @@ class Eynollah:
         self.model_page_dir = dir_models + "/model_eynollah_page_extraction_20250915"
         self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
         self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
-        self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_mb_ro_aug_ens_11"#"/model_step_3200000_mb_ro"#"/model_ens_reading_order_machine_based"#"/model_mb_ro_aug_ens_8"#"/model_ens_reading_order_machine_based"
+        self.model_region_dir_p_ens_light_only_images_extraction = (dir_models + 
+            "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
+        )
+        self.model_reading_order_dir = (dir_models + 
+            "/model_eynollah_reading_order_20250824"
+            #"/model_mb_ro_aug_ens_11"
+            #"/model_step_3200000_mb_ro"
+            #"/model_ens_reading_order_machine_based"
+            #"/model_mb_ro_aug_ens_8"
+            #"/model_ens_reading_order_machine_based"
+        )
         #"/modelens_12sp_elay_0_3_4__3_6_n"
         #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
         #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
@@ -379,11 +387,9 @@ class Eynollah:
                     self.b_s_ocr = 8
                 else:
                     self.b_s_ocr = int(batch_size_ocr)
-
                     
                 with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
                     characters = json.load(config_file)
-
                     
                 AUTOTUNE = tf.data.AUTOTUNE
 
@@ -840,7 +846,9 @@ class Eynollah:
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
             thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1):
+            thresholding_for_artificial_class_in_light_version=False,
+            thresholding_for_fl_light_version=False,
+            threshold_art_class_textline=0.1):
 
         self.logger.debug("enter do_prediction")
         img_height_model = model.layers[-1].output_shape[1]
@@ -1254,7 +1262,9 @@ class Eynollah:
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
             thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1, threshold_art_class_layout=0.1):
+            thresholding_for_artificial_class_in_light_version=False,
+            threshold_art_class_textline=0.1,
+            threshold_art_class_layout=0.1):
 
         self.logger.debug("enter do_prediction_new_concept")
         img_height_model = model.layers[-1].output_shape[1]
@@ -1384,7 +1394,8 @@ class Eynollah:
                     for i_batch, j_batch in zip(list_i_s, list_j_s):
                         seg_in = seg[indexer_inside_batch]
                         
-                        if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                        if (thresholding_for_artificial_class_in_light_version or
+                            thresholding_for_some_classes_in_light_version):
                             seg_in_art = seg_art[indexer_inside_batch]
 
                         index_y_u_in = list_y_u[indexer_inside_batch]
@@ -1404,7 +1415,8 @@ class Eynollah:
                                                 label_p_pred[0, 0:-margin or None,
                                                        0:-margin or None,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                                 index_x_d_in + 0:index_x_u_in - margin, 1] = \
                                                     seg_in_art[0:-margin or None,
@@ -1421,7 +1433,8 @@ class Eynollah:
                                                 label_p_pred[0, margin:,
                                                        margin:,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                                 index_x_d_in + margin:index_x_u_in - 0, 1] = \
                                                     seg_in_art[margin:,
@@ -1439,7 +1452,8 @@ class Eynollah:
                                                        0:-margin or None,
                                                        1]
                                             
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                                 index_x_d_in + 0:index_x_u_in - margin, 1] = \
                                                     seg_in_art[margin:,
@@ -1456,7 +1470,8 @@ class Eynollah:
                                                 label_p_pred[0, 0:-margin or None,
                                                        margin:,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                                 index_x_d_in + margin:index_x_u_in - 0, 1] = \
                                                     seg_in_art[0:-margin or None,
@@ -1473,7 +1488,8 @@ class Eynollah:
                                                 label_p_pred[0, margin:-margin or None,
                                                        0:-margin or None,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                                 index_x_d_in + 0:index_x_u_in - margin, 1] = \
                                                     seg_in_art[margin:-margin or None,
@@ -1489,7 +1505,8 @@ class Eynollah:
                                                 label_p_pred[0, margin:-margin or None,
                                                        margin:,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                                 index_x_d_in + margin:index_x_u_in - 0, 1] = \
                                                     seg_in_art[margin:-margin or None,
@@ -1505,7 +1522,8 @@ class Eynollah:
                                                 label_p_pred[0, 0:-margin or None,
                                                        margin:-margin or None,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                                 index_x_d_in + margin:index_x_u_in - margin, 1] = \
                                                     seg_in_art[0:-margin or None,
@@ -1521,7 +1539,8 @@ class Eynollah:
                                                 label_p_pred[0, margin:,
                                                        margin:-margin or None,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                                 index_x_d_in + margin:index_x_u_in - margin, 1] = \
                                                     seg_in_art[margin:,
@@ -1537,7 +1556,8 @@ class Eynollah:
                                                 label_p_pred[0, margin:-margin or None,
                                                        margin:-margin or None,
                                                        1]
-                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            if (thresholding_for_artificial_class_in_light_version or
+                                thresholding_for_some_classes_in_light_version):
                                 prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                                 index_x_d_in + margin:index_x_u_in - margin, 1] = \
                                                     seg_in_art[margin:-margin or None,
@@ -1686,7 +1706,10 @@ class Eynollah:
             else:
                 img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8)
 
-        prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3, thresholding_for_fl_light_version=thresholding_for_fl_light_version)
+        prediction_regions = self.do_prediction(patches, img, model_region,
+                                                marginal_of_patch_percent=0.1,
+                                                n_batch_inference=3,
+                                                thresholding_for_fl_light_version=thresholding_for_fl_light_version)
         prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
         self.logger.debug("exit extract_text_regions")
         return prediction_regions, prediction_regions
@@ -1839,7 +1862,10 @@ class Eynollah:
             cy_textline_in = [cy_main_tot[ind] for ind in indexes_in]
             w_h_textlines_in = [w_h_textlines[ind][0] / float(w_h_textlines[ind][1])  for ind in indexes_in]
 
-            textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, cx_textline_in, cy_textline_in, w_h_textlines_in)
+            textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins,
+                                                                      cx_textline_in,
+                                                                      cy_textline_in,
+                                                                      w_h_textlines_in)
             
             all_found_textline_polygons.append(textlines_ins)#[::-1])
             slopes.append(slope_deskew)
@@ -1847,7 +1873,13 @@ class Eynollah:
             crop_coor = box2rect(boxes[index])
             all_box_coord.append(crop_coor)
 
-        return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
+        return (all_found_textline_polygons,
+                boxes,
+                contours,
+                contours_par,
+                all_box_coord,
+                np.array(range(len(contours_par))),
+                slopes)
 
     def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
         if not len(contours):
@@ -1883,7 +1915,8 @@ class Eynollah:
         self.logger.debug("exit get_slopes_and_deskew_new")
         return tuple(zip(*results))
 
-    def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
+    def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes,
+                                         mask_texts_only, num_col, scale_par, slope_deskew):
         if not len(contours):
             return [], [], [], [], [], [], []
         self.logger.debug("enter get_slopes_and_deskew_new_curved")
@@ -1914,10 +1947,11 @@ class Eynollah:
         img_w = img_org.shape[1]
         img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w))
 
-        prediction_textline = self.do_prediction(
-            use_patches, img, self.model_textline,
-            marginal_of_patch_percent=0.15, n_batch_inference=3,
-            thresholding_for_artificial_class_in_light_version=self.textline_light, threshold_art_class_textline=self.threshold_art_class_textline)
+        prediction_textline = self.do_prediction(use_patches, img, self.model_textline,
+                                                 marginal_of_patch_percent=0.15,
+                                                 n_batch_inference=3,
+                                                 thresholding_for_artificial_class_in_light_version=self.textline_light,
+                                                 threshold_art_class_textline=self.threshold_art_class_textline)
         #if not self.textline_light:
             #if num_col_classifier==1:
                 #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline)
@@ -2009,12 +2043,14 @@ class Eynollah:
         boxes_sub_new = []
         poly_sub = []
         for mv in range(len(boxes_per_process)):
-            crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
+            crop_img, _ = crop_image_inside_box(boxes_per_process[mv],
+                                                np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
             crop_img = crop_img[:, :, 0]
             crop_img = cv2.erode(crop_img, KERNEL, iterations=2)
             try:
                 textline_con, hierarchy = return_contours_of_image(crop_img)
-                textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008)
+                textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy,
+                                                                 max_area=1, min_area=0.0008)
                 y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
                 sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
                 crop_img[crop_img > 0] = 1
@@ -2139,7 +2175,13 @@ class Eynollah:
                                                         [page_coord_img[2], page_coord_img[1]]]))
 
         self.logger.debug("exit get_regions_extract_images_only")
-        return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page
+        return (text_regions_p_true,
+                erosion_hurts,
+                polygons_seplines,
+                polygons_of_images_fin,
+                image_page,
+                page_coord,
+                cont_page)
 
     def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
         self.logger.debug("enter get_regions_light_v")
@@ -2197,7 +2239,8 @@ class Eynollah:
         #print("inside 1 ", time.time()-t_in)
 
         ###textline_mask_tot_ea = self.run_textline(img_bin)
-        self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized)))
+        self.logger.debug("detecting textlines on %s with %d colors",
+                          str(img_resized.shape), len(np.unique(img_resized)))
         textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier)
         textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
 
@@ -2214,13 +2257,15 @@ class Eynollah:
                                       img_resized.shape[1], img_resized.shape[0], num_col_classifier)
                     prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
                         True, img_resized, self.model_region_1_2, n_batch_inference=1,
-                        thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout)
+                        thresholding_for_some_classes_in_light_version=True,
+                        threshold_art_class_layout=self.threshold_art_class_layout)
                 else:
                     prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
                     confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
                     prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
                         False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1,
-                        thresholding_for_artificial_class_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout)
+                        thresholding_for_artificial_class_in_light_version=True,
+                        threshold_art_class_layout=self.threshold_art_class_layout)
                     ys = slice(*self.page_coord[0:2])
                     xs = slice(*self.page_coord[2:4])
                     prediction_regions_org[ys, xs] = prediction_regions_page
@@ -2233,8 +2278,11 @@ class Eynollah:
                                   img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
                 prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
                     True, img_resized, self.model_region_1_2, n_batch_inference=2,
-                    thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout)
-            ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
+                    thresholding_for_some_classes_in_light_version=True,
+                    threshold_art_class_layout=self.threshold_art_class_layout)
+            ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region,
+            ###n_batch_inference=3,
+            ###thresholding_for_some_classes_in_light_version=True)
             #print("inside 3 ", time.time()-t_in)
             #plt.imshow(prediction_regions_org[:,:,0])
             #plt.show()
@@ -2297,7 +2345,12 @@ class Eynollah:
             #plt.show()
             #print("inside 4 ", time.time()-t_in)
             self.logger.debug("exit get_regions_light_v")
-            return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix
+            return (text_regions_p_true,
+                    erosion_hurts,
+                    polygons_seplines,
+                    textline_mask_tot_ea,
+                    img_bin,
+                    confidence_matrix)
         else:
             img_bin = resize_image(img_bin,img_height_h, img_width_h )
             self.logger.debug("exit get_regions_light_v")
@@ -2417,14 +2470,10 @@ class Eynollah:
             #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
 
             #prediction_regions_org = self.do_prediction(True, img, self.model_region)
-
             #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
-
             #prediction_regions_org = prediction_regions_org[:,:,0]
-
             #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
 
-
             mask_lines_only = (prediction_regions_org == 3)*1
             mask_texts_only = (prediction_regions_org == 1)*1
             mask_images_only= (prediction_regions_org == 2)*1
@@ -2843,7 +2892,8 @@ class Eynollah:
                     contours_new.append(contours_sep[ji])
                     if num_col_classifier>=2:
                         only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1]))
-                        only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1))
+                        only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,
+                                                                pts=[contours_sep[ji]], color=(1,1,1))
                         table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early
                         iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum()
                         #print(iou_in,'iou_in_in1')
@@ -2928,9 +2978,11 @@ class Eynollah:
             contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
 
             if indiv==pixel_table:
-                main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001)
+                main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy,
+                                                                     max_area=1, min_area=0.001)
             else:
-                main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area)
+                main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy,
+                                                                     max_area=1, min_area=min_area)
 
             img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv))
             img_comm = img_comm.astype(np.uint8)
@@ -2965,8 +3017,14 @@ class Eynollah:
                     y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line)
                     y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab)
 
-                    cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text)
-                    cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab)
+                    (cx_tab_m_text, cy_tab_m_text,
+                     x_min_tab_m_text, x_max_tab_m_text,
+                     y_min_tab_m_text, y_max_tab_m_text,
+                     _) = find_new_features_of_contours(contours_table_m_text)
+                    (cx_tabl, cy_tabl,
+                     x_min_tabl, x_max_tabl,
+                     y_min_tabl, y_max_tabl,
+                     _) = find_new_features_of_contours(contours_tab)
 
                     if len(y_min_main_tab )>0:
                         y_down_tabs=[]
@@ -2976,9 +3034,15 @@ class Eynollah:
                             y_down_tab=[]
                             y_up_tab=[]
                             for i_l in range(len(y_min_main_line)):
-                                if y_min_main_tab[i_t]>y_min_main_line[i_l] and  y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]:
+                                if (y_min_main_tab[i_t] > y_min_main_line[i_l] and
+                                    y_max_main_tab[i_t] > y_min_main_line[i_l] and
+                                    y_min_main_tab[i_t] > y_max_main_line[i_l] and
+                                    y_max_main_tab[i_t] > y_min_main_line[i_l]):
                                     pass
-                                elif y_min_main_tab[i_t]<y_max_main_line[i_l] and y_max_main_tab[i_t]<y_max_main_line[i_l] and y_max_main_tab[i_t]<y_min_main_line[i_l] and y_min_main_tab[i_t]<y_min_main_line[i_l]:
+                                elif (y_min_main_tab[i_t] < y_max_main_line[i_l] and
+                                      y_max_main_tab[i_t] < y_max_main_line[i_l] and
+                                      y_max_main_tab[i_t] < y_min_main_line[i_l] and
+                                      y_min_main_tab[i_t] < y_min_main_line[i_l]):
                                     pass
                                 elif np.abs(y_max_main_line[i_l]-y_min_main_line[i_l])<100:
                                     pass
@@ -3280,7 +3344,8 @@ class Eynollah:
             else:
                 self.get_image_and_scales(img_org, img_res, scale)
             if self.allow_scaling:
-                img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin)
+                img_org, img_res, is_image_enhanced = \
+                    self.resize_image_with_column_classifier(is_image_enhanced, img_bin)
                 self.get_image_and_scales_after_enhancing(img_org, img_res)
         #print("enhancement in ", time.time()-t_in)
         return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
@@ -3289,7 +3354,10 @@ class Eynollah:
         scaler_h_textline = 1#1.3  # 1.2#1.2
         scaler_w_textline = 1#1.3  # 0.9#1
         #print(image_page.shape)
-        textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier)
+        textline_mask_tot_ea, _ = self.textline_contours(image_page, True,
+                                                         scaler_h_textline,
+                                                         scaler_w_textline,
+                                                         num_col_classifier)
         if self.textline_light:
             textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16)
 
@@ -3301,7 +3369,6 @@ class Eynollah:
         #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
         slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
                                           map=self.executor.map, logger=self.logger, plotter=self.plotter)
-
         if self.plotter:
             self.plotter.save_deskewed_image(slope_deskew)
         self.logger.info("slope_deskew: %.2f°", slope_deskew)
@@ -3346,7 +3413,9 @@ class Eynollah:
             regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
             if self.tables:
                 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
-        regions_without_separators = (text_regions_p[:, :] == 1) * 1  # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
+        regions_without_separators = (text_regions_p[:, :] == 1) * 1
+        # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1
+        #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
         #print(time.time()-t_0_box,'time box in 1')
         if self.tables:
             regions_without_separators[table_prediction ==1 ] = 1
@@ -3415,7 +3484,8 @@ class Eynollah:
 
                     pixel_line = 3
                     img_revised_tab2 = self.add_tables_heuristic_to_layout(
-                        text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables,
+                        text_regions_p_tables, boxes_d, 0, splitter_y_new_d,
+                        peaks_neg_tot_tables_d, text_regions_p_tables,
                         num_col_classifier, 0.000005, pixel_line)
                     img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
                         img_revised_tab2, table_prediction_n, 10, num_col_classifier)
@@ -3423,7 +3493,8 @@ class Eynollah:
                     img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew)
                     img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated)
                     img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8)
-                    img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1])
+                    img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated,
+                                                              text_regions_p.shape[0], text_regions_p.shape[1])
         #print(time.time()-t_0_box,'time box in 4')
         self.logger.info("detecting boxes took %.1fs", time.time() - t1)
 
@@ -3482,11 +3553,18 @@ class Eynollah:
                 img_revised_tab = text_regions_p[:,:]
                 if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
                     _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
-                        rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
+                        rotation_not_90_func(image_page, textline_mask_tot, text_regions_p,
+                                             table_prediction, slope_deskew)
 
-                    text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
-                    textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1])
-                    table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1])
+                    text_regions_p_1_n = resize_image(text_regions_p_1_n,
+                                                      text_regions_p.shape[0],
+                                                      text_regions_p.shape[1])
+                    textline_mask_tot_d = resize_image(textline_mask_tot_d,
+                                                       text_regions_p.shape[0],
+                                                       text_regions_p.shape[1])
+                    table_prediction_n = resize_image(table_prediction_n,
+                                                      text_regions_p.shape[0],
+                                                      text_regions_p.shape[1])
 
                     regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
                     regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
@@ -3502,11 +3580,18 @@ class Eynollah:
             else:
                 if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
                     _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
-                        rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
+                        rotation_not_90_func(image_page, textline_mask_tot, text_regions_p,
+                                             table_prediction, slope_deskew)
 
-                    text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
-                    textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1])
-                    table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1])
+                    text_regions_p_1_n = resize_image(text_regions_p_1_n,
+                                                      text_regions_p.shape[0],
+                                                      text_regions_p.shape[1])
+                    textline_mask_tot_d = resize_image(textline_mask_tot_d,
+                                                       text_regions_p.shape[0],
+                                                       text_regions_p.shape[1])
+                    table_prediction_n = resize_image(table_prediction_n,
+                                                      text_regions_p.shape[0],
+                                                      text_regions_p.shape[1])
 
                     regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
                     regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
@@ -3565,7 +3650,8 @@ class Eynollah:
 
                     pixel_line = 3
                     img_revised_tab2 = self.add_tables_heuristic_to_layout(
-                        text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables,
+                        text_regions_p_tables, boxes_d, 0, splitter_y_new_d,
+                        peaks_neg_tot_tables_d, text_regions_p_tables,
                         num_col_classifier, 0.000005, pixel_line)
 
                     img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
@@ -3574,8 +3660,9 @@ class Eynollah:
 
                     img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated)
                     img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8)
-
-                    img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1])
+                    img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated,
+                                                              text_regions_p.shape[0],
+                                                              text_regions_p.shape[1])
 
                 if np.abs(slope_deskew) < 0.13:
                     img_revised_tab = np.copy(img_revised_tab2[:,:,0])
@@ -3646,7 +3733,8 @@ class Eynollah:
         ##else:
             ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
 
-        ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions)
+        ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully,
+        ###    regions_fully_np, img_only_regions)
         # plt.imshow(regions_fully[:,:,0])
         # plt.show()
         text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4
@@ -3709,7 +3797,10 @@ class Eynollah:
         
         min_cont_size_to_be_dilated = 10
         if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
-            cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
+            (cx_conts, cy_conts,
+             x_min_conts, x_max_conts,
+             y_min_conts, y_max_conts,
+             _) = find_new_features_of_contours(contours_only_text_parent)
             args_cont_located = np.array(range(len(contours_only_text_parent)))
             
             diff_y_conts = np.abs(y_max_conts[:]-y_min_conts)
@@ -3724,15 +3815,31 @@ class Eynollah:
             args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3]
             args_cont_located_included = args_cont_located[diff_x_ratio<1.3]
             
-            contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3]
-            contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3]
+            contours_only_text_parent_excluded = [contours_only_text_parent[ind]
+                                                  #contours_only_text_parent[diff_x_ratio>=1.3]
+                                                  for ind in range(len(contours_only_text_parent))
+                                                  if diff_x_ratio[ind]>=1.3]
+            contours_only_text_parent_included = [contours_only_text_parent[ind]
+                                                  #contours_only_text_parent[diff_x_ratio<1.3]
+                                                  for ind in range(len(contours_only_text_parent))
+                                                  if diff_x_ratio[ind]<1.3]
             
-            
-            cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3]
-            cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3]
-            
-            cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3]
-            cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3]
+            cx_conts_excluded = [cx_conts[ind]
+                                 #cx_conts[diff_x_ratio>=1.3]
+                                 for ind in range(len(cx_conts))
+                                 if diff_x_ratio[ind]>=1.3]
+            cx_conts_included = [cx_conts[ind]
+                                 #cx_conts[diff_x_ratio<1.3]
+                                 for ind in range(len(cx_conts))
+                                 if diff_x_ratio[ind]<1.3]
+            cy_conts_excluded = [cy_conts[ind]
+                                 #cy_conts[diff_x_ratio>=1.3]
+                                 for ind in range(len(cy_conts))
+                                 if diff_x_ratio[ind]>=1.3]
+            cy_conts_included = [cy_conts[ind]
+                                 #cy_conts[diff_x_ratio<1.3]
+                                 for ind in range(len(cy_conts))
+                                 if diff_x_ratio[ind]<1.3]
             
             #print(diff_x_ratio, 'ratio')
             text_regions_p = text_regions_p.astype('uint8')
@@ -3754,7 +3861,10 @@ class Eynollah:
             contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
             contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
             
-            indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included)
+            indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = \
+                self.return_indexes_of_contours_located_inside_another_list_of_contours(
+                    contours_only_dilated, contours_only_text_parent_included,
+                    cx_conts_included, cy_conts_included, args_cont_located_included)
             
             
             if len(args_cont_located_excluded)>0:
@@ -3767,7 +3877,7 @@ class Eynollah:
             flattened_array = np.concatenate([arr.ravel() for arr in array_list])
             #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques')
             
-            missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) )
+            missing_textregions = list( set(range(len(contours_only_text_parent))) - set(flattened_array) )
             #print(missing_textregions, 'missing_textregions')
 
             for ind in missing_textregions:
@@ -3887,12 +3997,13 @@ class Eynollah:
                 region_with_curr_order = ordered[ind]
                 if region_with_curr_order < len(contours_only_dilated):
                     if np.isscalar(indexes_of_located_cont[region_with_curr_order]):
-                        org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
+                        org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]])
                     else:
                         arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order])
-                        org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list ( 
+                        org_contours_indexes.extend(
+                            np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont])
                 else:
-                    org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
+                    org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]])
             
             region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
             return org_contours_indexes, region_ids
@@ -3915,17 +4026,13 @@ class Eynollah:
         
         if len(peaks_real)>70:
             print(len(peaks_real), 'len(peaks_real)')
-
             peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
 
             arg_sort = np.argsort(sum_smoothed[peaks_real])
-
             arg_sort4 =arg_sort[::-1][:4]
-
             peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
 
             argsort_sorted = np.argsort(peaks_sort_4)
-
             first_4_sorted = peaks_sort_4[argsort_sorted]
             y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
             #print(first_4_sorted,'first_4_sorted')
@@ -4109,7 +4216,8 @@ class Eynollah:
 
         return x_differential_new
 
-    def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"):
+    def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image,
+                                            marginal_cnts=None, type_contour="textregion"):
         if type_contour=="textregion":
             areas = [cv2.contourArea(contours[j]) for j in range(len(contours))]
             area_tot = image.shape[0]*image.shape[1]
@@ -4129,7 +4237,10 @@ class Eynollah:
                 results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False)
                            for ind in contours_index_big]
                 if marginal_cnts:
-                    results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False)
+                    results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind],
+                                                             (cx_main[ind_small],
+                                                              cy_main[ind_small]),
+                                                             False)
                                         for ind in range(len(marginal_cnts))]
                     results_marginal = np.array(results_marginal)
 
@@ -4184,7 +4295,10 @@ class Eynollah:
                 args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest]
 
                 if len(args_with_bigger_area)>0:
-                    results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False)
+                    results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind],
+                                                    (cx_main_tot[ij],
+                                                     cy_main_tot[ij]),
+                                                    False)
                                for ind in args_with_bigger_area ]
                     results = np.array(results)
                     if np.any(results==1):
@@ -4196,14 +4310,16 @@ class Eynollah:
             textregion_index_to_del = np.array(textregion_index_to_del)
             textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del)
             for ind_u_a_trs in np.unique(textregion_index_to_del):
-                textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs]
+                textline_in_textregion_index_to_del_ind = \
+                    textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs]
                 textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1]
                 for ittrd in textline_in_textregion_index_to_del_ind:
                     contours[ind_u_a_trs].pop(ittrd)
 
             return contours
         
-    def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
+    def return_indexes_of_contours_located_inside_another_list_of_contours(
+            self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
         indexes_of_located_cont = []
         center_x_coordinates_of_located = []
         center_y_coordinates_of_located = []
@@ -4217,7 +4333,8 @@ class Eynollah:
                         for ind in range(len(cy_main_loc)) ]
             results = np.array(results)
             indexes_in = np.where((results == 0) | (results == 1))
-            indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
+            # [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
+            indexes = indexes_loc[indexes_in]
 
             indexes_of_located_cont.append(indexes)
             center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] )
@@ -4247,7 +4364,10 @@ class Eynollah:
 
         ###contours_with_textline = []
         ###for ind_tr, con_tr in enumerate(contours):
-            ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False)
+            ###results = [cv2.pointPolygonTest(con_tr,
+            ###                                 (cx_main_textline[index_textline_con],
+            ###                                  cy_main_textline[index_textline_con]),
+            ###                                 False)
         ###               for index_textline_con in range(len(contours_txtline_of_all_textregions)) ]
             ###results = np.array(results)
             ###if np.any(results==1):
@@ -4300,7 +4420,9 @@ class Eynollah:
         return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem,
                 contours_only_text_parent_rem, index_by_text_par_con_rem_sort)
     
-    def separate_marginals_to_left_and_right_and_order_from_top_to_down(self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width):
+    def separate_marginals_to_left_and_right_and_order_from_top_to_down(
+            self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals,
+            slopes_marginals, mid_point_of_page_width):
         cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours(
             polygons_of_marginals)
         
@@ -4310,8 +4432,10 @@ class Eynollah:
         poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] )
         poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] )
         
-        all_found_textline_polygons_marginals_left = list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] )
-        all_found_textline_polygons_marginals_right = list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] )
+        all_found_textline_polygons_marginals_left = \
+            list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] )
+        all_found_textline_polygons_marginals_right = \
+            list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] )
         
         all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] )
         all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] )
@@ -4322,20 +4446,38 @@ class Eynollah:
         cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width]
         cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width]
         
-        ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), key=lambda x: x[0])]
-        ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), key=lambda x: x[0])]
+        ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left),
+                                                             key=lambda x: x[0])]
+        ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right),
+                                                              key=lambda x: x[0])]
         
-        ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, all_found_textline_polygons_marginals_left), key=lambda x: x[0])]
-        ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, all_found_textline_polygons_marginals_right), key=lambda x: x[0])]
+        ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left,
+                                                                          all_found_textline_polygons_marginals_left),
+                                                                      key=lambda x: x[0])]
+        ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right,
+                                                                           all_found_textline_polygons_marginals_right),
+                                                                       key=lambda x: x[0])]
         
-        ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, all_box_coord_marginals_left), key=lambda x: x[0])]
-        ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, all_box_coord_marginals_right), key=lambda x: x[0])]
+        ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left,
+                                                                      all_box_coord_marginals_left),
+                                                                  key=lambda x: x[0])]
+        ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right,
+                                                                       all_box_coord_marginals_right),
+                                                                   key=lambda x: x[0])]
         
-        ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), key=lambda x: x[0])]
-        ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), key=lambda x: x[0])]
+        ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left),
+                                                                    key=lambda x: x[0])]
+        ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right),
+                                                                     key=lambda x: x[0])]
         
-        return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals
-
+        return (ordered_left_marginals,
+                ordered_right_marginals,
+                ordered_left_marginals_textline,
+                ordered_right_marginals_textline,
+                ordered_left_marginals_bbox,
+                ordered_right_marginals_bbox,
+                ordered_left_slopes_marginals,
+                ordered_right_slopes_marginals)
 
     def run(self,
             overwrite: bool = False,
@@ -4420,9 +4562,11 @@ class Eynollah:
         self.logger.info(f"Processing file: {self.writer.image_filename}")
         self.logger.info("Step 1/5: Image Enhancement")
         
-        img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
+        img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = \
+            self.run_enhancement(self.light_version)
         
-        self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns")
+        self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, "
+                         f"{self.dpi} DPI, {num_col_classifier} columns")
         if is_image_enhanced:
             self.logger.info("Enhancement applied")
         
@@ -4433,7 +4577,8 @@ class Eynollah:
         if self.extract_only_images:
             self.logger.info("Step 2/5: Image Extraction Mode")
             
-            text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \
+            text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, \
+                image_page, page_coord, cont_page = \
                 self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [],
@@ -4465,20 +4610,20 @@ class Eynollah:
             
             M_main_tot = [cv2.moments(all_found_textline_polygons[j])
                         for j in range(len(all_found_textline_polygons))]
-            w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] for j in range(len(all_found_textline_polygons))]
+            w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:]
+                             for j in range(len(all_found_textline_polygons))]
             w_h_textlines = [w_h_textlines[j][0] / float(w_h_textlines[j][1]) for j in range(len(w_h_textlines))]
             cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
             cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
             
-            all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines)#all_found_textline_polygons[::-1]
-
-            all_found_textline_polygons=[ all_found_textline_polygons ]
-
+            all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(
+                #all_found_textline_polygons[::-1]
+                all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines)
+            all_found_textline_polygons = [ all_found_textline_polygons ]
             all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
             all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                 all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
             
-            
             order_text_new = [0]
             slopes =[0]
             id_of_texts_tot =['region_0001']
@@ -4498,15 +4643,23 @@ class Eynollah:
             
             if self.ocr and not self.tr:
                 gc.collect()
-                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True)
+                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
+                    image_page, all_found_textline_polygons, self.prediction_model,
+                    self.b_s_ocr, self.num_to_char, textline_light=True)
             else:
                 ocr_all_textlines = None
             
             pcgts = self.writer.build_pagexml_no_full_layout(
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
-                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
-                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
-                cont_page, polygons_seplines, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
+                all_found_textline_polygons, page_coord, polygons_of_images,
+                polygons_of_marginals_left, polygons_of_marginals_right,
+                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
+                all_box_coord_marginals_left, all_box_coord_marginals_right,
+                slopes, slopes_marginals_left, slopes_marginals_right, 
+                cont_page, polygons_seplines, contours_tables,
+                ocr_all_textlines=ocr_all_textlines,
+                conf_contours_textregion=conf_contours_textregions,
+                skip_layout_reading_order=self.skip_layout_and_reading_order)
             self.logger.info("Basic processing complete")
             return pcgts
 
@@ -4516,7 +4669,8 @@ class Eynollah:
         
         if self.light_version:
             self.logger.info("Using light version processing")
-            text_regions_p_1 ,erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
+            text_regions_p_1 ,erosion_hurts, polygons_seplines, textline_mask_tot_ea, \
+                img_bin_light, confidence_matrix = \
                 self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
             #print("text region early -2 in %.1fs", time.time() - t0)
 
@@ -4528,7 +4682,6 @@ class Eynollah:
                 img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1]
 
                 textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
-
                 slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew)
             else:
                 slope_deskew = self.run_deskew(textline_mask_tot_ea)
@@ -4537,7 +4690,8 @@ class Eynollah:
             num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
                 text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \
                     self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea,
-                                                        num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light)
+                                                        num_col_classifier, num_column_is_classified,
+                                                        erosion_hurts, img_bin_light)
             #self.logger.info("run graphics %.1fs ", time.time() - t1t)
             #print("text region early -3 in %.1fs", time.time() - t0)
             textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
@@ -4552,7 +4706,8 @@ class Eynollah:
             t1 = time.time()
             num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
                 text_regions_p_1, cont_page, table_prediction = \
-                    self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
+                    self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified,
+                                                  erosion_hurts)
             self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s")
             #self.logger.info('cont_page %s', cont_page)
         #plt.imshow(table_prediction)
@@ -4617,13 +4772,15 @@ class Eynollah:
         ## birdan sora chock chakir
         t1 = time.time()
         if not self.full_layout:
-            polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
+            polygons_of_images, img_revised_tab, text_regions_p_1_n, \
+                textline_mask_tot_d, regions_without_separators_d, \
                 boxes, boxes_d, polygons_of_marginals, contours_tables = \
                 self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                               num_col_classifier, table_prediction, erosion_hurts)
             ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
         else:
-            polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
+            polygons_of_images, img_revised_tab, text_regions_p_1_n, \
+                textline_mask_tot_d, regions_without_separators_d, \
                 regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
                 self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
                                            num_col_classifier, img_only_regions, table_prediction, erosion_hurts,
@@ -4690,8 +4847,10 @@ class Eynollah:
                     areas_cnt_text_d = self.return_list_of_contours_with_desired_order(
                         areas_cnt_text_d, index_con_parents_d)
 
-                    cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d])
-                    cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d)
+                    cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = \
+                        find_new_features_of_contours([contours_biggest_d])
+                    cx_bigest_d, cy_biggest_d, _, _, _, _, _ = \
+                        find_new_features_of_contours(contours_only_text_parent_d)
                     try:
                         if len(cx_bigest_d) >= 5:
                             cx_bigest_d_last5 = cx_bigest_d[-5:]
@@ -4751,13 +4910,19 @@ class Eynollah:
                 pcgts = self.writer.build_pagexml_full_layout(
                     [], [], page_coord, [], [], [], [], [], [],
                     polygons_of_images, contours_tables, [],
-                    polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [],
+                    polygons_of_marginals, polygons_of_marginals,
+                    empty_marginals, empty_marginals,
+                    empty_marginals, empty_marginals,
+                    [], [], [], [],
                     cont_page, polygons_seplines)
             else:
                 pcgts = self.writer.build_pagexml_no_full_layout(
                     [], page_coord, [], [], [], [],
                     polygons_of_images,
-                    polygons_of_marginals, polygons_of_marginals,  empty_marginals, empty_marginals, empty_marginals, empty_marginals,  [], [], [], 
+                    polygons_of_marginals, polygons_of_marginals,
+                    empty_marginals, empty_marginals,
+                    empty_marginals, empty_marginals,
+                    [], [], [], 
                     cont_page, polygons_seplines, contours_tables)
             return pcgts
 
@@ -4767,7 +4932,8 @@ class Eynollah:
         if self.light_version:
             contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
             contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one(
-                contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals)
+                contours_only_text_parent, contours_only_text_parent_d_ordered, text_only,
+                marginal_cnts=polygons_of_marginals)
             #print("text region early 3.5 in %.1fs", time.time() - t0)
             txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
                 contours_only_text_parent, self.image, confidence_matrix)
@@ -4793,19 +4959,26 @@ class Eynollah:
                             polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org,
                             boxes_marginals, slope_deskew)
 
-                    #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
+                    #slopes, all_found_textline_polygons, boxes_text, txt_con_org, \
+                    #    contours_only_text_parent, index_by_text_par_con = \
                     #    self.delete_regions_without_textlines(slopes, all_found_textline_polygons,
                     #        boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con)
-                    #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \
+                    #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, \
+                    #    polygons_of_marginals, polygons_of_marginals, _ = \
                     #    self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals,
-                    #        boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
-                    all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
+                    #        boxes_marginals, polygons_of_marginals, polygons_of_marginals,
+                    #        np.array(range(len(polygons_of_marginals))))
+                    all_found_textline_polygons = dilate_textline_contours(
+                        all_found_textline_polygons)
                     all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                         all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
-                    all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals)
-                    contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \
+                    all_found_textline_polygons_marginals = dilate_textline_contours(
+                        all_found_textline_polygons_marginals)
+                    contours_only_text_parent, txt_con_org, conf_contours_textregions, \
+                        all_found_textline_polygons, contours_only_text_parent_d_ordered, \
                         index_by_text_par_con = self.filter_contours_without_textline_inside(
-                            contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions)
+                            contours_only_text_parent, txt_con_org, all_found_textline_polygons,
+                            contours_only_text_parent_d_ordered, conf_contours_textregions)
                 else:
                     textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
                     all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \
@@ -4847,7 +5020,13 @@ class Eynollah:
                 all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
         
         mid_point_of_page_width = text_regions_p.shape[1] / 2.
-        polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right = self.separate_marginals_to_left_and_right_and_order_from_top_to_down(polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width)
+        (polygons_of_marginals_left, polygons_of_marginals_right,
+         all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
+         all_box_coord_marginals_left, all_box_coord_marginals_right,
+         slopes_marginals_left, slopes_marginals_right) = \
+             self.separate_marginals_to_left_and_right_and_order_from_top_to_down(
+                 polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals,
+                 slopes_marginals, mid_point_of_page_width)
         
         #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred')
         if self.full_layout:
@@ -4871,40 +5050,41 @@ class Eynollah:
                 all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \
                 contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \
                     conf_contours_textregions, conf_contours_textregions_h = fun(
-                    text_regions_p, regions_fully, contours_only_text_parent,
-                    all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions)
+                        text_regions_p, regions_fully, contours_only_text_parent,
+                        all_box_coord, all_found_textline_polygons,
+                        slopes, contours_only_text_parent_d_ordered, conf_contours_textregions)
 
             if self.plotter:
                 self.plotter.save_plot_of_layout(text_regions_p, image_page)
                 self.plotter.save_plot_of_layout_all(text_regions_p, image_page)
 
-            pixel_img = 4
-            polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
+            label_img = 4
+            polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, label_img)
             ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
                 ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
                 ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
                 ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light)
 
             if not self.reading_order_machine_based:
-                pixel_seps = 6
+                label_seps = 6
                 if not self.headers_off:
                     if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                         num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
                             np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2),
-                            num_col_classifier, self.tables,  pixel_seps, contours_only_text_parent_h)
+                            num_col_classifier, self.tables,  label_seps, contours_only_text_parent_h)
                     else:
                         _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
                             np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),
-                            num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered)
+                            num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered)
                 elif self.headers_off:
                     if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                         num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
                             np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2),
-                            num_col_classifier, self.tables,  pixel_seps)
+                            num_col_classifier, self.tables,  label_seps)
                     else:
                         _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
                             np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),
-                            num_col_classifier, self.tables, pixel_seps)
+                            num_col_classifier, self.tables, label_seps)
 
                 if num_col_classifier >= 3:
                     if np.abs(slope_deskew) < SLOPE_THRESHOLD:
@@ -4949,7 +5129,8 @@ class Eynollah:
                         contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
                 else:
                     order_text_new, id_of_texts_tot = self.do_order_of_regions(
-                        contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
+                        contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered,
+                        boxes_d, textline_mask_tot_d)
             self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
 
             if self.ocr and not self.tr:
@@ -4962,27 +5143,37 @@ class Eynollah:
                 
                 gc.collect()
                 if len(all_found_textline_polygons)>0:
-                    ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                    ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
+                        image_page, all_found_textline_polygons, self.prediction_model,
+                        self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 else:
                     ocr_all_textlines = None
                     
                 if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0:
-                    ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                    ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(
+                        image_page, all_found_textline_polygons_marginals_left, self.prediction_model,
+                        self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 else:
                     ocr_all_textlines_marginals_left = None
                     
                 if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0:
-                    ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                    ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(
+                        image_page, all_found_textline_polygons_marginals_right, self.prediction_model,
+                        self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 else:
                     ocr_all_textlines_marginals_right = None
                 
                 if all_found_textline_polygons_h and len(all_found_textline_polygons)>0:
-                    ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                    ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(
+                        image_page, all_found_textline_polygons_h, self.prediction_model,
+                        self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 else:
                     ocr_all_textlines_h = None
                     
                 if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0:
-                    ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                    ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(
+                        image_page, polygons_of_drop_capitals, self.prediction_model,
+                        self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 else:
                     ocr_all_textlines_drop = None
             else:
@@ -4997,9 +5188,15 @@ class Eynollah:
             pcgts = self.writer.build_pagexml_full_layout(
                 contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
-                polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
-                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
-                cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
+                polygons_of_images, contours_tables, polygons_of_drop_capitals,
+                polygons_of_marginals_left, polygons_of_marginals_right,
+                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
+                all_box_coord_marginals_left, all_box_coord_marginals_right,
+                slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
+                cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h,
+                ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right,
+                ocr_all_textlines_drop,
+                conf_contours_textregions, conf_contours_textregions_h)
             
             return pcgts
 
@@ -5034,18 +5231,14 @@ class Eynollah:
 
         if self.ocr and self.tr:
             self.logger.info("Step 4.5/5: OCR Processing")
-            
             if torch.cuda.is_available():
                 self.logger.info("Using GPU acceleration")
             else:
                 self.logger.info("Using CPU processing")
-            
             if self.light_version:
                 self.logger.info("Using light version OCR")
-            
             if self.textline_light:
                 self.logger.info("Using light text line detection for OCR")
-            
             self.logger.info("Processing text lines...")
             
             device = cuda.get_current_device()
@@ -5090,7 +5283,8 @@ class Eynollah:
 
                     img_croped = img_poly_on_img[y:y+h, x:x+w, :]
                     #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped)
-                    text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot)
+                    text_ocr = self.return_ocr_of_textline_without_common_section(
+                        img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot)
                     ocr_textline_in_textregion.append(text_ocr)
                     ind_tot = ind_tot +1
                 ocr_all_textlines.append(ocr_textline_in_textregion)
@@ -5098,13 +5292,19 @@ class Eynollah:
         elif self.ocr and not self.tr:
             gc.collect()
             if len(all_found_textline_polygons)>0:
-                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
+                    image_page, all_found_textline_polygons, self.prediction_model,
+                    self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 
             if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0:
-                ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(
+                    image_page, all_found_textline_polygons_marginals_left, self.prediction_model,
+                    self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 
             if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0:
-                ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(
+                    image_page, all_found_textline_polygons_marginals_right, self.prediction_model,
+                    self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
 
         else:
             ocr_all_textlines = None
@@ -5117,9 +5317,14 @@ class Eynollah:
 
         pcgts = self.writer.build_pagexml_no_full_layout(
             txt_con_org, page_coord, order_text_new, id_of_texts_tot,
-            all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
-            all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
-            cont_page, polygons_seplines, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
+            all_found_textline_polygons, all_box_coord, polygons_of_images,
+            polygons_of_marginals_left, polygons_of_marginals_right,
+            all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
+            all_box_coord_marginals_left, all_box_coord_marginals_right,
+            slopes, slopes_marginals_left, slopes_marginals_right, 
+            cont_page, polygons_seplines, contours_tables, ocr_all_textlines,
+            ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right,
+            conf_contours_textregions)
         
         return pcgts
 
@@ -5138,7 +5343,6 @@ class Eynollah_ocr:
         min_conf_value_of_textline_text : Optional[float]=None,
         logger=None,
     ):
-        self.dir_models = dir_models
         self.model_name = model_name
         self.tr_ocr = tr_ocr
         self.export_textline_images_and_text = export_textline_images_and_text
@@ -5261,7 +5465,9 @@ class Eynollah_ocr:
                                 if child_textlines.tag.endswith("Coords"):
                                     cropped_lines_region_indexer.append(indexer_text_region)
                                     p_h=child_textlines.attrib['points'].split(' ')
-                                    textline_coords =  np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] )
+                                    textline_coords =  np.array( [ [int(x.split(',')[0]),
+                                                                    int(x.split(',')[1]) ]
+                                                                   for x in p_h] )
                                     x,y,w,h = cv2.boundingRect(textline_coords)
                                     
                                     if dir_out_image_text:
@@ -5277,9 +5483,12 @@ class Eynollah_ocr:
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
                                     img_crop[mask_poly==0] = 255
                                     
-                                    self.logger.debug("processing %d lines for '%s'", len(cropped_lines), nn.attrib['id'])
+                                    self.logger.debug("processing %d lines for '%s'",
+                                                      len(cropped_lines), nn.attrib['id'])
                                     if h2w_ratio > 0.1:
-                                        cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)  )
+                                        cropped_lines.append(resize_image(img_crop,
+                                                                          tr_ocr_input_height_and_width,
+                                                                          tr_ocr_input_height_and_width)  )
                                         cropped_lines_meging_indexing.append(0)
                                         indexer_b_s+=1
                                         if indexer_b_s==self.b_s:
@@ -5288,8 +5497,10 @@ class Eynollah_ocr:
                                             indexer_b_s = 0
                                             
                                             pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
-                                            generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
-                                            generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                            generated_ids_merged = self.model_ocr.generate(
+                                                pixel_values_merged.to(self.device))
+                                            generated_text_merged = self.processor.batch_decode(
+                                                generated_ids_merged, skip_special_tokens=True)
                                             
                                             extracted_texts = extracted_texts + generated_text_merged
                                             
@@ -5297,7 +5508,9 @@ class Eynollah_ocr:
                                         splited_images, _ = return_textlines_split_if_needed(img_crop, None)
                                         #print(splited_images)
                                         if splited_images:
-                                            cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
+                                            cropped_lines.append(resize_image(splited_images[0],
+                                                                              tr_ocr_input_height_and_width,
+                                                                              tr_ocr_input_height_and_width))
                                             cropped_lines_meging_indexing.append(1)
                                             indexer_b_s+=1
                                             
@@ -5307,13 +5520,17 @@ class Eynollah_ocr:
                                                 indexer_b_s = 0
                                                 
                                                 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
-                                                generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
-                                                generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                                generated_ids_merged = self.model_ocr.generate(
+                                                    pixel_values_merged.to(self.device))
+                                                generated_text_merged = self.processor.batch_decode(
+                                                    generated_ids_merged, skip_special_tokens=True)
                                                 
                                                 extracted_texts = extracted_texts + generated_text_merged
                                             
                                             
-                                            cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
+                                            cropped_lines.append(resize_image(splited_images[1],
+                                                                              tr_ocr_input_height_and_width,
+                                                                              tr_ocr_input_height_and_width))
                                             cropped_lines_meging_indexing.append(-1)
                                             indexer_b_s+=1
                                             
@@ -5323,8 +5540,10 @@ class Eynollah_ocr:
                                                 indexer_b_s = 0
                                                 
                                                 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
-                                                generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
-                                                generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                                generated_ids_merged = self.model_ocr.generate(
+                                                    pixel_values_merged.to(self.device))
+                                                generated_text_merged = self.processor.batch_decode(
+                                                    generated_ids_merged, skip_special_tokens=True)
                                                 
                                                 extracted_texts = extracted_texts + generated_text_merged
                                                 
@@ -5339,8 +5558,10 @@ class Eynollah_ocr:
                                                 indexer_b_s = 0
                                                 
                                                 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
-                                                generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
-                                                generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                                                generated_ids_merged = self.model_ocr.generate(
+                                                    pixel_values_merged.to(self.device))
+                                                generated_text_merged = self.processor.batch_decode(
+                                                    generated_ids_merged, skip_special_tokens=True)
                                                 
                                                 extracted_texts = extracted_texts + generated_text_merged
                                                 
@@ -5371,15 +5592,22 @@ class Eynollah_ocr:
                         ####n_end = (i+1)*self.b_s
                         ####imgs = cropped_lines[n_start:n_end]
                     ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
-                    ####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
-                    ####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                    ####generated_ids_merged = self.model_ocr.generate(
+                    ####    pixel_values_merged.to(self.device))
+                    ####generated_text_merged = self.processor.batch_decode(
+                    ####    generated_ids_merged, skip_special_tokens=True)
                     
                     ####extracted_texts = extracted_texts + generated_text_merged
                     
                 del cropped_lines
                 gc.collect()
 
-                extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+                extracted_texts_merged = [extracted_texts[ind]
+                                          if cropped_lines_meging_indexing[ind]==0
+                                          else extracted_texts[ind]+" "+extracted_texts[ind+1]
+                                          if cropped_lines_meging_indexing[ind]==1
+                                          else None
+                                          for ind in range(len(cropped_lines_meging_indexing))]
 
                 extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
                 #print(extracted_texts_merged, len(extracted_texts_merged))
@@ -5401,7 +5629,8 @@ class Eynollah_ocr:
                         w_bb = bb_ind[2]
                         h_bb = bb_ind[3]
                         
-                        font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) )
+                        font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
+                                                    font.path, w_bb, int(h_bb*0.4) )
                         
                         ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
                         
@@ -5419,25 +5648,27 @@ class Eynollah_ocr:
                 #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer')
                 #######text_by_textregion = []
                 #######for ind in unique_cropped_lines_region_indexer:
-                    #######extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
-                    
+                    #######ind = np.array(cropped_lines_region_indexer)==ind
+                    #######extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
                     #######text_by_textregion.append(" ".join(extracted_texts_merged_un))
                     
                 text_by_textregion = []
                 for ind in unique_cropped_lines_region_indexer:
-                    extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
+                    ind = np.array(cropped_lines_region_indexer) == ind
+                    extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
                     if len(extracted_texts_merged_un)>1:
                         text_by_textregion_ind = ""
                         next_glue = ""
                         for indt in range(len(extracted_texts_merged_un)):
-                            if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'):
-                                text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1]
+                            if (extracted_texts_merged_un[indt].endswith('⸗') or
+                                extracted_texts_merged_un[indt].endswith('-') or
+                                extracted_texts_merged_un[indt].endswith('¬')):
+                                text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
                                 next_glue = ""
                             else:
-                                text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt]
+                                text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
                                 next_glue = " "
                         text_by_textregion.append(text_by_textregion_ind)
-                            
                     else:
                         text_by_textregion.append(" ".join(extracted_texts_merged_un))
                         
@@ -5495,7 +5726,9 @@ class Eynollah_ocr:
                             unicode_textregion.text = text_by_textregion[indexer_textregion]
                         indexer_textregion = indexer_textregion + 1
                         
-                ###sample_order  = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order]
+                ###sample_order  = [(id_to_order[tid], text)
+                ###                 for tid, text in zip(id_textregions, textregions_by_existing_ids)
+                ###                 if tid in id_to_order]
                 
                 ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
                 ##tot_page_text = ' '.join(ordered_texts_sample)
@@ -5569,7 +5802,9 @@ class Eynollah_ocr:
                                 if child_textlines.tag.endswith("Coords"):
                                     cropped_lines_region_indexer.append(indexer_text_region)
                                     p_h=child_textlines.attrib['points'].split(' ')
-                                    textline_coords =  np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] )
+                                    textline_coords =  np.array( [ [int(x.split(',')[0]),
+                                                                    int(x.split(',')[1]) ]
+                                                                   for x in p_h] )
                                     
                                     x,y,w,h = cv2.boundingRect(textline_coords)
                                     
@@ -5601,17 +5836,19 @@ class Eynollah_ocr:
                                             img_crop[mask_poly==0] = 255
                                         
                                     else:
-                                        #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
+                                        # print(file_name, angle_degrees, w*h,
+                                        #       mask_poly[:,:,0].sum(),
+                                        #       mask_poly[:,:,0].sum() /float(w*h) ,
+                                        #       'didi')
                                         
                                         if angle_degrees > 3:
                                             better_des_slope = get_orientation_moments(textline_coords)
                                             
-                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope )
-                                            
+                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope)
                                             if dir_in_bin is not None:
-                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
+                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
                                                 
-                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
+                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
                                             mask_poly = mask_poly.astype('uint8')
                                             
                                             #new bounding box
@@ -5622,7 +5859,6 @@ class Eynollah_ocr:
                                                 
                                             if not self.do_not_mask_with_textline_contour:
                                                 img_crop[mask_poly==0] = 255
-                                            
                                             if dir_in_bin is not None:
                                                 img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                                 if not self.do_not_mask_with_textline_contour:
@@ -5630,11 +5866,14 @@ class Eynollah_ocr:
                                             
                                             if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
                                                 if dir_in_bin is not None:
-                                                    img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                    img_crop, img_crop_bin = \
+                                                        break_curved_line_into_small_pieces_and_then_merge(
+                                                            img_crop, mask_poly, img_crop_bin)
                                                 else:
-                                                    img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                    img_crop, _ = \
+                                                        break_curved_line_into_small_pieces_and_then_merge(
+                                                            img_crop, mask_poly)
         
-                                                
                                         else:
                                             better_des_slope = 0
                                             if not self.do_not_mask_with_textline_contour:
@@ -5647,13 +5886,18 @@ class Eynollah_ocr:
                                             else:
                                                 if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
                                                     if dir_in_bin is not None:
-                                                        img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                        img_crop, img_crop_bin = \
+                                                            break_curved_line_into_small_pieces_and_then_merge(
+                                                                img_crop, mask_poly, img_crop_bin)
                                                     else:
-                                                        img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                        img_crop, _ = \
+                                                            break_curved_line_into_small_pieces_and_then_merge(
+                                                                img_crop, mask_poly)
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 750:#1.5*image_width:
-                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if abs(better_des_slope) > 45:
                                                 cropped_lines_ver_index.append(1)
@@ -5662,13 +5906,15 @@ class Eynollah_ocr:
                                                 
                                             cropped_lines_meging_indexing.append(0)
                                             if dir_in_bin is not None:
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                    img_crop_bin, image_height, image_width)
                                                 cropped_lines_bin.append(img_fin)
                                         else:
                                             splited_images, splited_images_bin = return_textlines_split_if_needed(
                                                 img_crop, img_crop_bin if dir_in_bin is not None else None)
                                             if splited_images:
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                    splited_images[0], image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(1)
                                                 
@@ -5677,7 +5923,8 @@ class Eynollah_ocr:
                                                 else:
                                                     cropped_lines_ver_index.append(0)
                                                 
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                    splited_images[1], image_height, image_width)
                                                 
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(-1)
@@ -5688,13 +5935,16 @@ class Eynollah_ocr:
                                                     cropped_lines_ver_index.append(0)
                                                 
                                                 if dir_in_bin is not None:
-                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                        splited_images_bin[0], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
-                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                        splited_images_bin[1], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
                                                     
                                             else:
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                    img_crop, image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(0)
                                                 
@@ -5704,7 +5954,8 @@ class Eynollah_ocr:
                                                     cropped_lines_ver_index.append(0)
                                                 
                                                 if dir_in_bin is not None:
-                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                        img_crop_bin, image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
                                         
                                 if self.export_textline_images_and_text:
@@ -5716,7 +5967,8 @@ class Eynollah_ocr:
                                                 if cheild_text.tag.endswith("Unicode"):
                                                     textline_text = cheild_text.text
                                                     if textline_text:
-                                                        base_name = os.path.join(dir_out, file_name + '_line_' + str(indexer_textlines))
+                                                        base_name = os.path.join(
+                                                            dir_out, file_name + '_line_' + str(indexer_textlines))
                                                         if self.pref_of_dataset:
                                                             base_name += '_' + self.pref_of_dataset
                                                         if not self.do_not_mask_with_textline_contour:
@@ -5806,25 +6058,31 @@ class Eynollah_ocr:
                             preds_max_fliped = np.max(preds_flipped, axis=2 )
                             preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
                             pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
-                            masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
+                            masked_means_flipped = \
+                                np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
+                                np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
                             masked_means_flipped[np.isnan(masked_means_flipped)] = 0
                             
                             preds_max = np.max(preds, axis=2 )
                             preds_max_args = np.argmax(preds, axis=2 )
                             pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
                             
-                            masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
+                            masked_means = \
+                                np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
+                                np.sum(pred_max_not_unk_mask_bool, axis=1)
                             masked_means[np.isnan(masked_means)] = 0
                             
                             masked_means_ver = masked_means[indices_ver]
                             #print(masked_means_ver, 'pred_max_not_unk')
                             
-                            indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
+                            indices_where_flipped_conf_value_is_higher = \
+                                np.where(masked_means_flipped > masked_means_ver)[0]
                             
                             #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
                             if len(indices_where_flipped_conf_value_is_higher)>0:
                                 indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
-                                preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
+                                preds[indices_to_be_replaced,:,:] = \
+                                    preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
                         if dir_in_bin is not None:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                             
@@ -5833,35 +6091,42 @@ class Eynollah_ocr:
                                 preds_max_fliped = np.max(preds_flipped, axis=2 )
                                 preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
                                 pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
-                                masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
+                                masked_means_flipped = \
+                                    np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
+                                    np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
                                 masked_means_flipped[np.isnan(masked_means_flipped)] = 0
                                 
                                 preds_max = np.max(preds, axis=2 )
                                 preds_max_args = np.argmax(preds, axis=2 )
                                 pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
                                 
-                                masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
+                                masked_means = \
+                                    np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
+                                    np.sum(pred_max_not_unk_mask_bool, axis=1)
                                 masked_means[np.isnan(masked_means)] = 0
                                 
                                 masked_means_ver = masked_means[indices_ver]
                                 #print(masked_means_ver, 'pred_max_not_unk')
                                 
-                                indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
+                                indices_where_flipped_conf_value_is_higher = \
+                                    np.where(masked_means_flipped > masked_means_ver)[0]
                                 
                                 #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
                                 if len(indices_where_flipped_conf_value_is_higher)>0:
                                     indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
-                                    preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
+                                    preds_bin[indices_to_be_replaced,:,:] = \
+                                        preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
                             
                             preds = (preds + preds_bin) / 2.
-                            
 
                         pred_texts = decode_batch_predictions(preds, self.num_to_char)
                         
                         preds_max = np.max(preds, axis=2 )
                         preds_max_args = np.argmax(preds, axis=2 )
                         pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
-                        masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
+                        masked_means = \
+                            np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
+                            np.sum(pred_max_not_unk_mask_bool, axis=1)
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
@@ -5876,31 +6141,40 @@ class Eynollah_ocr:
                         del cropped_lines_bin
                     gc.collect()
                     
-                    extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+                    extracted_texts_merged = [extracted_texts[ind]
+                                              if cropped_lines_meging_indexing[ind]==0
+                                              else extracted_texts[ind]+" "+extracted_texts[ind+1]
+                                              if cropped_lines_meging_indexing[ind]==1
+                                              else None
+                                              for ind in range(len(cropped_lines_meging_indexing))]
                     
-                    extracted_conf_value_merged = [extracted_conf_value[ind]  if cropped_lines_meging_indexing[ind]==0 else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+                    extracted_conf_value_merged = [extracted_conf_value[ind]
+                                                   if cropped_lines_meging_indexing[ind]==0
+                                                   else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
+                                                   if cropped_lines_meging_indexing[ind]==1
+                                                   else None
+                                                   for ind in range(len(cropped_lines_meging_indexing))]
 
-                    extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] for ind_cfm in range(len(extracted_texts_merged)) if extracted_texts_merged[ind_cfm] is not None]
+                    extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
+                                                   for ind_cfm in range(len(extracted_texts_merged))
+                                                   if extracted_texts_merged[ind_cfm] is not None]
                     extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
                     unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
                     
-                    
                     if dir_out_image_text:
-                        
                         #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                         font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
                         with importlib_resources.as_file(font) as font:
                             font = ImageFont.truetype(font=font, size=40)
                         
                         for indexer_text, bb_ind in enumerate(total_bb_coordinates):
-                            
-                            
                             x_bb = bb_ind[0]
                             y_bb = bb_ind[1]
                             w_bb = bb_ind[2]
                             h_bb = bb_ind[3]
                             
-                            font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) )
+                            font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
+                                                        font.path, w_bb, int(h_bb*0.4) )
                             
                             ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
                             
@@ -5917,24 +6191,25 @@ class Eynollah_ocr:
 
                     text_by_textregion = []
                     for ind in unique_cropped_lines_region_indexer:
-                        extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
+                        ind = np.array(cropped_lines_region_indexer)==ind
+                        extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
                         if len(extracted_texts_merged_un)>1:
                             text_by_textregion_ind = ""
                             next_glue = ""
                             for indt in range(len(extracted_texts_merged_un)):
-                                if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'):
-                                    text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1]
+                                if (extracted_texts_merged_un[indt].endswith('⸗') or
+                                    extracted_texts_merged_un[indt].endswith('-') or
+                                    extracted_texts_merged_un[indt].endswith('¬')):
+                                    text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
                                     next_glue = ""
                                 else:
-                                    text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt]
+                                    text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
                                     next_glue = " "
                             text_by_textregion.append(text_by_textregion_ind)
-                                
                         else:
                             text_by_textregion.append(" ".join(extracted_texts_merged_un))
                         #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
-                        
-                        
+
                     ###index_tot_regions = []
                     ###tot_region_ref = []
 
@@ -5983,7 +6258,8 @@ class Eynollah_ocr:
                                         if childtest3.tag.endswith("TextEquiv"):
                                             for child_uc in childtest3:
                                                 if child_uc.tag.endswith("Unicode"):
-                                                    childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
+                                                    childtest3.set('conf',
+                                                                   f"{extracted_conf_value_merged[indexer]:.2f}")
                                                     child_uc.text = extracted_texts_merged[indexer]
                                         
                                 indexer = indexer + 1
@@ -5999,7 +6275,9 @@ class Eynollah_ocr:
                                 unicode_textregion.text = text_by_textregion[indexer_textregion]
                             indexer_textregion = indexer_textregion + 1
                             
-                    ###sample_order  = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order]
+                    ###sample_order  = [(id_to_order[tid], text)
+                    ###                 for tid, text in zip(id_textregions, textregions_by_existing_ids)
+                    ###                 if tid in id_to_order]
                     
                     ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
                     ##tot_page_text = ' '.join(ordered_texts_sample)
diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index f8926cf..52bf3ef 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -1012,8 +1012,13 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
                        (regions_model_full[:,:,0]==2)).sum()
         pixels_main = all_pixels - pixels_header
 
-        if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
-            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
+        if (( pixels_header / float(pixels_main) >= 0.6 and
+              length_con[ii] / float(height_con[ii]) >= 1.3 and
+              length_con[ii] / float(height_con[ii]) <= 3 ) or
+            ( pixels_header / float(pixels_main) >= 0.3 and
+              length_con[ii] / float(height_con[ii]) >=3 )):
+
+            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2
             contours_only_text_parent_head.append(contours_only_text_parent[ii])
             conf_contours_head.append(None) # why not conf_contours[ii], too?
             if contours_only_text_parent_d_ordered is not None:
@@ -1021,8 +1026,9 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
             all_box_coord_head.append(all_box_coord[ii])
             slopes_head.append(slopes[ii])
             all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
+
         else:
-            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
+            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1
             contours_only_text_parent_main.append(contours_only_text_parent[ii])
             conf_contours_main.append(conf_contours[ii])
             if contours_only_text_parent_d_ordered is not None:
@@ -1883,7 +1889,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                                     range(x_start_without_mother[dj],
                                           x_end_without_mother[dj]))
                             columns_not_covered = list(all_columns - columns_covered_by_mothers)
-                            y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
+                            y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) +
+                                                                   len(x_start_without_mother),
                                                                    dtype=int) * splitter_y_new[i])
                             ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                             ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
@@ -1938,7 +1945,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                             columns_covered_by_with_child_no_mothers.update(
                                 range(x_start_with_child_without_mother[dj],
                                       x_end_with_child_without_mother[dj]))
-                        columns_not_covered_child_no_mother = list(all_columns - columns_covered_by_with_child_no_mothers)
+                        columns_not_covered_child_no_mother = list(
+                            all_columns - columns_covered_by_with_child_no_mothers)
                         #indexes_to_be_spanned=[]
                         for i_s in range(len(x_end_with_child_without_mother)):
                             columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
@@ -1948,7 +1956,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                         x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int)
                         for i_s_nc in columns_not_covered_child_no_mother:
                             if i_s_nc in x_start_with_child_without_mother:
-                                x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
+                                x_end_biggest_column = \
+                                    x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
                                 args_all_biggest_lines = ind_args[(x_starting==i_s_nc) &
                                                                   (x_ending==x_end_biggest_column)]
                                 y_column_nc = y_type_2[args_all_biggest_lines]
@@ -1996,9 +2005,12 @@ def return_boxes_of_images_by_order_of_reading_new(
                                                 np.array(list(set(list(range(len(y_all_between_nm_wc)))) -
                                                               set(list(index_lines_so_close_to_top_separator))))
                                             if len(indexes_remained_after_deleting_closed_lines) > 0:
-                                                y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
-                                                x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
-                                                x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
+                                                y_all_between_nm_wc = \
+                                                    y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
+                                                x_starting_all_between_nm_wc = \
+                                                    x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
+                                                x_ending_all_between_nm_wc = \
+                                                    x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
 
                                         y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
                                         x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc)
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 7a8926d..d41dda1 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -67,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
             peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
             neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
 
-            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
+            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
+                y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
             diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
 
             arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@@ -78,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
 
             clusters_to_be_deleted = []
             if len(arg_diff_cluster) > 0:
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
+                clusters_to_be_deleted.append(
+                    arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
                 for i in range(len(arg_diff_cluster) - 1):
-                    clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
-                                                                          arg_diff_cluster[i + 1] + 1])
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
+                    clusters_to_be_deleted.append(
+                        arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
+                                                arg_diff_cluster[i + 1] + 1])
+                clusters_to_be_deleted.append(
+                    arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
             if len(clusters_to_be_deleted) > 0:
                 peaks_new_extra = []
                 for m in range(len(clusters_to_be_deleted)):
@@ -179,7 +183,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
         peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
         neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
 
-        arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
+            y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
         diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
         
         arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@@ -239,7 +244,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
         
     try:
         neg_peaks_max=np.max(y_padded_smoothed[peaks])
-        arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
+            y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
         diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
         
         arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@@ -316,23 +322,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                 
                 if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
                     point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_down =y_max_cont-1
+                    ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
+                    #point_up
+                    # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
                 else:
                     point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_down =y_max_cont-1
+                    ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
+                    #point_up
+                    # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
 
                 point_down_narrow = peaks[jj] + first_nonzero + int(
-                    1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
+                    1.4 * dis_to_next_down)
+                ###-int(dis_to_next_down*1./2)
             else:
                 dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
                 dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
                 
                 if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
-                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
+                    ##+int(dis_to_next_up*1./4.0)
+                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
                 else:
-                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
+                    ##+int(dis_to_next_up*1./4.0)
+                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
 
                 point_down_narrow = peaks[jj] + first_nonzero + int(
                     1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
@@ -341,7 +360,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                 point_down_narrow = img_patch.shape[0] - 2
             
 
-            distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
+                                              True)
                             for mj in range(len(xv))]
             distances = np.array(distances)
 
@@ -468,7 +489,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                     point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
                     
             distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
+                                              True)
                          for mj in range(len(xv))]
             distances = np.array(distances)
 
@@ -543,7 +565,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                 point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
                 
             distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
+                                              True)
                          for mj in range(len(xv))]
             distances = np.array(distances)
 
@@ -613,7 +636,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
 
     neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg])
 
-    arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
+    arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
+        y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
     diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
 
     arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@@ -689,30 +713,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                 dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
 
                 if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
-                    point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down = x_max_cont - 1  ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)
+                    ##+int(dis_to_next_up*1./4.0)
+                    point_down = x_max_cont - 1
+                    ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
+                    #point_up
+                    # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
                 else:
-                    point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down = x_max_cont - 1  ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)
+                    ##+int(dis_to_next_up*1./4.0)
+                    point_down = x_max_cont - 1
+                    ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
+                    #point_up
+                    # np.max(y_cont)
+                    #peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
 
-                point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
+                point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
+                ###-int(dis_to_next_down*1./2)
             else:
                 dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
                 dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
 
                 if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
-                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
+                    ##+int(dis_to_next_up*1./4.0)
+                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
                 else:
-                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
+                    ##+int(dis_to_next_up*1./4.0)
+                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
+                    ###-int(dis_to_next_down*1./4.0)
 
-                point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
+                point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
+                ###-int(dis_to_next_down*1./2)
 
             if point_down_narrow >= img_patch.shape[0]:
                 point_down_narrow = img_patch.shape[0] - 2
             
-            distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))]
+            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
+                                              True)
+                         for mj in range(len(xv))]
             distances = np.array(distances)
 
             xvinside = xv[distances >= 0]
@@ -801,7 +845,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                 point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next)
             
             distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
+                                              True)
                          for mj in range(len(xv))]
             distances = np.array(distances)
 
@@ -866,7 +911,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                 point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down)
             
             distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
+                                              True)
                          for mj in range(len(xv))]
             distances = np.array(distances)
 
@@ -950,7 +996,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
             peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
             neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
 
-            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
+            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
+                y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
             diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
 
             arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@@ -963,8 +1010,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
             if len(arg_diff_cluster) > 0:
                 clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
                 for i in range(len(arg_diff_cluster) - 1):
-                    clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
+                    clusters_to_be_deleted.append(
+                        arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
+                                                arg_diff_cluster[i + 1] + 1])
+                clusters_to_be_deleted.append(
+                    arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
             if len(clusters_to_be_deleted) > 0:
                 peaks_new_extra = []
                 for m in range(len(clusters_to_be_deleted)):
@@ -1014,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
     try:
         neg_peaks_max = np.max(y_padded_smoothed[peaks])
 
-        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
+            y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
         diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
 
         arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@@ -1290,7 +1341,9 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
 
     return None, cont_final
 
-def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
+def textline_contours_postprocessing(textline_mask, slope,
+                                     contour_text_interest, box_ind,
+                                     add_boxes_coor_into_textlines=False):
     textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
     textline_mask = textline_mask.astype(np.uint8)
     kernel = np.ones((5, 5), np.uint8)
@@ -1485,7 +1538,8 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
     onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
 
     #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
-    #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
+    #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0],
+    #             int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
     img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
 
     if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
@@ -1689,14 +1743,18 @@ def do_work_of_slopes_new_curved(
                 mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
 
             pixel_img = 1
-            mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
+            mask_biggest2 = resize_image(mask_biggest2,
+                                         int(mask_biggest2.shape[0] * scale_par),
+                                         int(mask_biggest2.shape[1] * scale_par))
             cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
             try:
                 textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
             except Exception as why:
                 logger.error(why)
     else:
-        textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True)
+        textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw,
+                                                                    slope_for_all, contour_par,
+                                                                    box_text, True)
 
     return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
 
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 5f19387..602ad6e 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -370,7 +370,11 @@ def return_textline_contour_with_added_box_coordinate(textline_contour,  box_ind
     return textline_contour
 
 
-def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
+def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons,
+                                          prediction_model,
+                                          b_s_ocr, num_to_char,
+                                          textline_light=False,
+                                          curved_line=False):
     max_len = 512
     padding_token = 299
     image_width = 512#max_len * 4
@@ -426,17 +430,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
                     splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                     
                     if splited_images:
-                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0],
+                                                                               image_height,
+                                                                               image_width)
                         cropped_lines.append(img_fin)
                         cropped_lines_meging_indexing.append(1)
                         
-                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1],
+                                                                               image_height,
+                                                                               image_width)
                         
                         cropped_lines.append(img_fin)
                         cropped_lines_meging_indexing.append(-1)
                         
                     else:
-                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop,
+                                                                               image_height,
+                                                                               image_width)
                         cropped_lines.append(img_fin)
                         cropped_lines_meging_indexing.append(0)
             
@@ -469,7 +479,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
             extracted_texts.append(pred_texts_ib)
             
-    extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+    extracted_texts_merged = [extracted_texts[ind]
+                              if cropped_lines_meging_indexing[ind]==0
+                              else extracted_texts[ind]+" "+extracted_texts[ind+1]
+                              if cropped_lines_meging_indexing[ind]==1
+                              else None
+                              for ind in range(len(cropped_lines_meging_indexing))]
 
     extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
     unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)

From b21051db21cf4c0f0e1bbf288cd4e985cc01cb7f Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 19:16:00 +0200
Subject: [PATCH 33/41] ProcessPoolExecutor: shutdown during del() instead of
 atexit()

---
 src/eynollah/eynollah.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 2e31433..7a28478 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -260,7 +260,6 @@ class Eynollah:
 
         # for parallelization of CPU-intensive tasks:
         self.executor = ProcessPoolExecutor(max_workers=cpu_count())
-        atexit.register(self.executor.shutdown)
             
         if threshold_art_class_layout:
             self.threshold_art_class_layout = float(threshold_art_class_layout)
@@ -406,6 +405,26 @@ class Eynollah:
                 
         self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
 
+    def __del__(self):
+        if hasattr(self, 'executor') and getattr(self, 'executor'):
+            self.executor.shutdown()
+        for model_name in ['model_page',
+                           'model_classifier',
+                           'model_bin',
+                           'model_enhancement',
+                           'model_region',
+                           'model_region_1_2',
+                           'model_region_p2',
+                           'model_region_fl_np',
+                           'model_region_fl',
+                           'model_textline',
+                           'model_reading_order',
+                           'model_table',
+                           'model_ocr',
+                           'processor']:
+            if hasattr(self, model_name) and getattr(self, model_name):
+                delattr(self, model_name)
+
     def cache_images(self, image_filename=None, image_pil=None, dpi=None):
         ret = {}
         t_c0 = time.time()

From 375e0263d4188ff5ca43037a6176544009c74e17 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 19:16:50 +0200
Subject: [PATCH 34/41] CNN-RNN OCR model: switch to 20250930 version
 (compatible with TF 2.12 on CPU as well)

---
 src/eynollah/eynollah.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 7a28478..62ce002 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -327,7 +327,7 @@ class Eynollah:
         if self.ocr and self.tr:
             self.model_ocr_dir = dir_models + "/model_eynollah_ocr_trocr_20250919"
         elif self.ocr and not self.tr:
-            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250904"
+            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930"
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@@ -5392,7 +5392,7 @@ class Eynollah_ocr:
                 if self.model_name:
                     self.model_ocr_dir = self.model_name
                 else:
-                    self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250904"
+                    self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(

From 61b20cc83d153aa0df2f5b75d6059ac80c730b3c Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 19:20:35 +0200
Subject: [PATCH 35/41] tests: switch from subtests to parametrize, use
 --isolate everywhere to free CUDA memory in between

---
 Makefile              |   2 +-
 requirements-test.txt |   2 +-
 tests/test_run.py     | 202 ++++++++++++++++++++----------------------
 3 files changed, 100 insertions(+), 106 deletions(-)

diff --git a/Makefile b/Makefile
index a920615..dd95c0a 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v
 
 OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1
 
-PYTEST_ARGS ?= -vv
+PYTEST_ARGS ?= -vv --isolate
 
 # BEGIN-EVAL makefile-parser --make-help Makefile
 
diff --git a/requirements-test.txt b/requirements-test.txt
index cce9428..3ebcf71 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,4 @@
 pytest
-pytest-subtests
+pytest-isolate
 coverage[toml]
 black
diff --git a/tests/test_run.py b/tests/test_run.py
index be928a0..59e5099 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -20,23 +20,9 @@ MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_
 MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
 MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
 
-def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
-    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
-    outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
-    args = [
-        '-m', MODELS_LAYOUT,
-        '-i', str(infile),
-        '-o', str(outfile.parent),
-        # subtests write to same location
-        '--overwrite',
-    ]
-    if pytestconfig.getoption('verbose') > 0:
-        args.extend(['-l', 'DEBUG'])
-    caplog.set_level(logging.INFO)
-    def only_eynollah(logrec):
-        return logrec.name == 'eynollah'
-    runner = CliRunner()
-    for options in [
+@pytest.mark.parametrize(
+    "options",
+    [
             [], # defaults
             ["--allow_scaling", "--curved-line"],
             ["--allow_scaling", "--curved-line", "--full-layout"],
@@ -47,22 +33,34 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
             # -eoi ...
             # --do_ocr
             # --skip_layout_and_reading_order
-    ]:
-        with subtests.test(#msg="test CLI",
-                           options=options):
-            with caplog.filtering(only_eynollah):
-                result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
-            assert result.exit_code == 0, result.stdout
-            logmsgs = [logrec.message for logrec in caplog.records]
-            assert str(infile) in logmsgs
-            assert outfile.exists()
-            tree = page_from_file(str(outfile)).etree
-            regions = tree.xpath("//page:TextRegion", namespaces=NS)
-            assert len(regions) >= 2, "result is inaccurate"
-            regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
-            assert len(regions) >= 2, "result is inaccurate"
-            lines = tree.xpath("//page:TextLine", namespaces=NS)
-            assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
+    ], ids=str)
+def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
+    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
+    outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
+    args = [
+        '-m', MODELS_LAYOUT,
+        '-i', str(infile),
+        '-o', str(outfile.parent),
+    ]
+    if pytestconfig.getoption('verbose') > 0:
+        args.extend(['-l', 'DEBUG'])
+    caplog.set_level(logging.INFO)
+    def only_eynollah(logrec):
+        return logrec.name == 'eynollah'
+    runner = CliRunner()
+    with caplog.filtering(only_eynollah):
+        result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
+    assert result.exit_code == 0, result.stdout
+    logmsgs = [logrec.message for logrec in caplog.records]
+    assert str(infile) in logmsgs
+    assert outfile.exists()
+    tree = page_from_file(str(outfile)).etree
+    regions = tree.xpath("//page:TextRegion", namespaces=NS)
+    assert len(regions) >= 2, "result is inaccurate"
+    regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
+    assert len(regions) >= 2, "result is inaccurate"
+    lines = tree.xpath("//page:TextLine", namespaces=NS)
+    assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
 
 def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
     indir = testdir.joinpath('resources')
@@ -86,7 +84,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
     assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
     assert len(list(outdir.iterdir())) == 2
 
-def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
+    "options",
+    [
+            [], # defaults
+            ["--no-patches"],
+    ], ids=str)
+def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options):
     infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
     outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
     args = [
@@ -100,25 +104,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
     def only_eynollah(logrec):
         return logrec.name == 'SbbBinarizer'
     runner = CliRunner()
-    for options in [
-            [], # defaults
-            ["--no-patches"],
-    ]:
-        with subtests.test(#msg="test CLI",
-                           options=options):
-            with caplog.filtering(only_eynollah):
-                result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
-            assert result.exit_code == 0, result.stdout
-            logmsgs = [logrec.message for logrec in caplog.records]
-            assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
-            assert outfile.exists()
-            with Image.open(infile) as original_img:
-                original_size = original_img.size
-            with Image.open(outfile) as binarized_img:
-                binarized_size = binarized_img.size
-            assert original_size == binarized_size
+    with caplog.filtering(only_eynollah):
+        result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
+    assert result.exit_code == 0, result.stdout
+    logmsgs = [logrec.message for logrec in caplog.records]
+    assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
+    assert outfile.exists()
+    with Image.open(infile) as original_img:
+        original_size = original_img.size
+    with Image.open(outfile) as binarized_img:
+        binarized_size = binarized_img.size
+    assert original_size == binarized_size
 
-def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
     indir = testdir.joinpath('resources')
     outdir = tmp_path
     args = [
@@ -139,15 +137,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
     assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
     assert len(list(outdir.iterdir())) == 2
 
-def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
+    "options",
+    [
+            [], # defaults
+            ["-sos"],
+    ], ids=str)
+def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options):
     infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
     outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
     args = [
         '-m', MODELS_LAYOUT,
         '-i', str(infile),
         '-o', str(outfile.parent),
-        # subtests write to same location
-        '--overwrite',
     ]
     if pytestconfig.getoption('verbose') > 0:
         args.extend(['-l', 'DEBUG'])
@@ -155,25 +157,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
     def only_eynollah(logrec):
         return logrec.name == 'enhancement'
     runner = CliRunner()
-    for options in [
-            [], # defaults
-            ["-sos"],
-    ]:
-        with subtests.test(#msg="test CLI",
-                           options=options):
-            with caplog.filtering(only_eynollah):
-                result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
-            assert result.exit_code == 0, result.stdout
-            logmsgs = [logrec.message for logrec in caplog.records]
-            assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
-            assert outfile.exists()
-            with Image.open(infile) as original_img:
-                original_size = original_img.size
-            with Image.open(outfile) as enhanced_img:
-                enhanced_size = enhanced_img.size
-            assert (original_size == enhanced_size) == ("-sos" in options)
+    with caplog.filtering(only_eynollah):
+        result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
+    assert result.exit_code == 0, result.stdout
+    logmsgs = [logrec.message for logrec in caplog.records]
+    assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
+    assert outfile.exists()
+    with Image.open(infile) as original_img:
+        original_size = original_img.size
+    with Image.open(outfile) as enhanced_img:
+        enhanced_size = enhanced_img.size
+    assert (original_size == enhanced_size) == ("-sos" in options)
 
-def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
     indir = testdir.joinpath('resources')
     outdir = tmp_path
     args = [
@@ -194,7 +190,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
     assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
     assert len(list(outdir.iterdir())) == 2
 
-def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
     infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
     outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
     args = [
@@ -223,7 +219,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
     #assert in_order != out_order
     assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
 
-def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
     indir = testdir.joinpath('resources')
     outdir = tmp_path
     args = [
@@ -245,7 +241,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
     #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
     assert len(list(outdir.iterdir())) == 2
 
-def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
+    "options",
+    [
+        [], # defaults
+        ["-doit", #str(outrenderfile.parent)],
+         ],
+        ["-trocr"],
+    ], ids=str)
+def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
     infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
     outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
     outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
@@ -255,8 +259,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
         '-i', str(infile),
         '-dx', str(infile.parent),
         '-o', str(outfile.parent),
-        # subtests write to same location
-        '--overwrite',
     ]
     if pytestconfig.getoption('verbose') > 0:
         args.extend(['-l', 'DEBUG'])
@@ -264,33 +266,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
     def only_eynollah(logrec):
         return logrec.name == 'eynollah'
     runner = CliRunner()
-    for options in [
-            # kba  Fri Sep 26 12:53:49 CEST 2025
-            # Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
-            # [], # defaults
-            # ["-doit", str(outrenderfile.parent)],
-            ["-trocr"],
-    ]:
-        with subtests.test(#msg="test CLI",
-                           options=options):
-            with caplog.filtering(only_eynollah):
-                result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
-            assert result.exit_code == 0, result.stdout
-            logmsgs = [logrec.message for logrec in caplog.records]
-            # FIXME: ocr has no logging!
-            #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
-            assert outfile.exists()
-            if "-doit" in options:
-                assert outrenderfile.exists()
-            #in_tree = page_from_file(str(infile)).etree
-            #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
-            out_tree = page_from_file(str(outfile)).etree
-            out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
-            assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
-            assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
+    if "-doit" in options:
+        options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
+    with caplog.filtering(only_eynollah):
+        result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
+    assert result.exit_code == 0, result.stdout
+    logmsgs = [logrec.message for logrec in caplog.records]
+    # FIXME: ocr has no logging!
+    #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
+    assert outfile.exists()
+    if "-doit" in options:
+        assert outrenderfile.exists()
+    #in_tree = page_from_file(str(infile)).etree
+    #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
+    out_tree = page_from_file(str(outfile)).etree
+    out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
+    assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
+    assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
 
-@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged")
-def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
     indir = testdir.joinpath('resources')
     outdir = tmp_path
     args = [

From a3d8197930b9e2c07862186d23ee192dc0347ff4 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 21:50:21 +0200
Subject: [PATCH 36/41] makefile: update model URL

---
 Makefile | 50 ++++++++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index dd95c0a..357aa47 100644
--- a/Makefile
+++ b/Makefile
@@ -13,10 +13,16 @@ DOCKER ?= docker
 #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
 #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
 SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
+SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
+SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
 
 BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
+BIN_MODELFILE = $(notdir $(BIN_MODEL))
+BIN_MODELNAME := default-2021-03-09
 
-OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1
+OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
+OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
+OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
 
 PYTEST_ARGS ?= -vv --isolate
 
@@ -31,7 +37,8 @@ help:
 	@echo "    install      Install package with pip"
 	@echo "    install-dev  Install editable with pip"
 	@echo "    deps-test    Install test dependencies with pip"
-	@echo "    models       Download and extract models to $(CURDIR)/models_layout_v0_5_0"
+	@echo "    models       Download and extract models to $(CURDIR):"
+	@echo "                 $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)"
 	@echo "    smoke-test   Run simple CLI check"
 	@echo "    ocrd-test    Run OCR-D CLI check"
 	@echo "    test         Run unit tests"
@@ -42,33 +49,29 @@ help:
 	@echo "    PYTEST_ARGS  pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]"
 	@echo "    SEG_MODEL    URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]"
 	@echo "    BIN_MODEL    URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]"
+	@echo "    OCR_MODEL    URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]"
 	@echo ""
 
 # END-EVAL
 
 
 # Download and extract models to $(PWD)/models_layout_v0_5_0
-models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09
+models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
 
-models_layout_v0_5_0: models_layout_v0_5_0.tar.gz
-	tar zxf models_layout_v0_5_0.tar.gz
-
-models_layout_v0_5_0.tar.gz:
+$(BIN_MODELFILE):
+	wget -O $@ $(BIN_MODEL)
+$(SEG_MODELFILE):
 	wget -O $@ $(SEG_MODEL)
-
-models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz
-	tar zxf models_ocr_v0_5_0.tar.gz
-
-models_ocr_v0_5_0.tar.gz:
+$(OCR_MODELFILE):
 	wget -O $@ $(OCR_MODEL)
 
-default-2021-03-09: $(notdir $(BIN_MODEL))
-	unzip $(notdir $(BIN_MODEL))
+$(BIN_MODELNAME): $(BIN_MODELFILE)
 	mkdir $@
-	mv $(basename $(notdir $(BIN_MODEL))) $@
-
-$(notdir $(BIN_MODEL)):
-	wget $(BIN_MODEL)
+	unzip -d $@ $<
+$(SEG_MODELNAME): $(SEG_MODELFILE)
+	tar zxf $<
+$(OCR_MODELNAME): $(OCR_MODELFILE)
+	tar zxf $<
 
 build:
 	$(PIP) install build
@@ -82,7 +85,10 @@ install:
 install-dev:
 	$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)])
 
-deps-test: models_layout_v0_5_0
+ifeq (OCR,$(findstring OCR, $(EXTRAS)))
+deps-test: $(OCR_MODELNAME)
+endif
+deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
 	$(PIP) install -r requirements-test.txt
 
 smoke-test: TMPDIR != mktemp -d
@@ -123,9 +129,9 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
 	$(RM) -r $(TMPDIR)
 
 # Run unit tests
-test: export MODELS_LAYOUT=$(CURDIR)/models_layout_v0_5_0
-test: export MODELS_OCR=$(CURDIR)/models_ocr_v0_5_0
-test: export MODELS_BIN=$(CURDIR)/default-2021-03-09
+test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME)
+test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME)
+test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME)
 test:
 	$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)
 

From c86e59f481ee47ccb9938b7f6105f95f626c5f17 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 22:03:46 +0200
Subject: [PATCH 37/41] CI: update model key, split up cache restore/save

---
 .github/workflows/test-eynollah.yml | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml
index 042e508..ca213cb 100644
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@@ -24,17 +24,17 @@ jobs:
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
         df -h
     - uses: actions/checkout@v4
-    - uses: actions/cache@v4
+    - uses: actions/cache/restore@v4
       id: seg_model_cache
       with:
         path: models_layout_v0_5_0
-        key: ${{ runner.os }}-models
-    - uses: actions/cache@v4
+        key: ${{ runner.os }}-seg-models
+    - uses: actions/cache/restore@v4
       id: ocr_model_cache
       with:
-        path: models_ocr_v0_5_0
-        key: ${{ runner.os }}-models
-    - uses: actions/cache@v4
+        path: models_ocr_v0_5_1
+        key: ${{ runner.os }}-ocr-models
+    - uses: actions/cache/restore@v4
       id: bin_model_cache
       with:
         path: default-2021-03-09
@@ -42,6 +42,21 @@ jobs:
     - name: Download models
       if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true
       run: make models
+    - uses: actions/cache/save@v4
+      if: steps.seg_model_cache.outputs.cache-hit != 'true'
+      with:
+        path: models_layout_v0_5_0
+        key: ${{ runner.os }}-seg-models
+    - uses: actions/cache/save@v4
+      if: steps.ocr_model_cache.outputs.cache-hit != 'true'
+      with:
+        path: models_ocr_v0_5_1
+        key: ${{ runner.os }}-ocr-models
+    - uses: actions/cache/save@v4
+      if: steps.bin_model_cache.outputs.cache-hit != 'true'
+      with:
+        path: default-2021-03-09
+        key: ${{ runner.os }}-modelbin
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v5
       with:

From ad129ed46c70b03fea7b48060e40e2451b40b975 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 22:05:53 +0200
Subject: [PATCH 38/41] CI: remove OS from model cache keys

---
 .github/workflows/test-eynollah.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml
index ca213cb..9d5b2c8 100644
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@@ -28,17 +28,17 @@ jobs:
       id: seg_model_cache
       with:
         path: models_layout_v0_5_0
-        key: ${{ runner.os }}-seg-models
+        key: seg-models
     - uses: actions/cache/restore@v4
       id: ocr_model_cache
       with:
         path: models_ocr_v0_5_1
-        key: ${{ runner.os }}-ocr-models
+        key: ocr-models
     - uses: actions/cache/restore@v4
       id: bin_model_cache
       with:
         path: default-2021-03-09
-        key: ${{ runner.os }}-modelbin
+        key: bin-models
     - name: Download models
       if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true
       run: make models
@@ -46,17 +46,17 @@ jobs:
       if: steps.seg_model_cache.outputs.cache-hit != 'true'
       with:
         path: models_layout_v0_5_0
-        key: ${{ runner.os }}-seg-models
+        key: seg-models
     - uses: actions/cache/save@v4
       if: steps.ocr_model_cache.outputs.cache-hit != 'true'
       with:
         path: models_ocr_v0_5_1
-        key: ${{ runner.os }}-ocr-models
+        key: ocr-models
     - uses: actions/cache/save@v4
       if: steps.bin_model_cache.outputs.cache-hit != 'true'
       with:
         path: default-2021-03-09
-        key: ${{ runner.os }}-modelbin
+        key: bin-models
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v5
       with:

From 7daec392b9846931b932d48fde71680ab4bf33e9 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 22:10:45 +0200
Subject: [PATCH 39/41] Dockerfile: fix up CUDA installation for mixed TF/Torch

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 4ba498b..a15776e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename
 RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json
 # install everything and reduce image size
 RUN make install EXTRAS=OCR && rm -rf /build/eynollah
+# fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow)
+RUN pip install nvidia-cudnn-cu11==8.6.0.163
 # smoke test
 RUN eynollah --help
 

From f0de1adabf45f3dd70df72ddc09795a4512d5316 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 23:12:18 +0200
Subject: [PATCH 40/41] rm loky dependency

---
 .gitignore       | 4 ++++
 requirements.txt | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 0d5d834..3cc0eac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,11 @@
 __pycache__
 sbb_newspapers_org_image/pylint.log
 models_eynollah*
+models_ocr*
+models_layout*
+default-2021-03-09
 output.html
 /build
 /dist
 *.tif
+TAGS
diff --git a/requirements.txt b/requirements.txt
index 4bc0c6a..db1d7df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,4 @@ scikit-learn >= 0.23.2
 tensorflow < 2.13
 numba <= 0.58.1
 scikit-image
-loky
 biopython

From 3aa7ad04fafd842fe31c36094a2b51fa43cc1bd3 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 30 Sep 2025 23:14:52 +0200
Subject: [PATCH 41/41] :memo: update changelog

---
 CHANGELOG.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ad9a09..f6776d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,33 @@ Versioned according to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+Fixed:
+
+ * :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.)
+ * `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify
+ * `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring
+ * `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice
+ * `get_marginals`: exit early if no peaks found to avoid spurious overlap mask
+ * `get_smallest_skew`: after shifting search range of rotation angle, use overall best result
+ * Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR)
+ * OCR: re-instate missing methods and fix `utils_ocr` function calls
+ * :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`)
+f458e3e
+ * tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate`
+   (so CUDA memory gets freed between tests if running on GPU)
+
+Changed:
+ 
+ * polygons: slightly widen for regions and lines, increase for separators
+ * various refactorings, some code style and identifier improvements
+ * deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster), 
+   but use shared memory if necessary, and switch back from `loky` to stdlib,
+   and shutdown in `del()` instead of `atexit`
+ * :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too
+ * :fire: writer: use `@type='heading'` instead of `'header'` for headings
+ * CI: update+improve model caching
+
+
 ## [0.5.0] - 2025-09-26
 
 Fixed: