Merge remote-tracking branch 'bertsky/loky-with-shm-for-175-rebuilt' into prepare-v0.6.0

2025-10-06 14:39:55 +02:00 · 2025-10-01 20:27:56 +02:00 · 2025-10-01 20:27:56 +02:00 · 96eb1c11e6
commit 96eb1c11e6
parent 5725e4fd1f 3aa7ad04fa
16 changed files with 1558 additions and 1312 deletions
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@ -24,24 +24,39 @@ jobs:
        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
        df -h
    - uses: actions/checkout@v4
-    - uses: actions/cache@v4
+    - uses: actions/cache/restore@v4
      id: seg_model_cache
      with:
        path: models_layout_v0_5_0
-        key: ${{ runner.os }}-models
+        key: seg-models
-    - uses: actions/cache@v4
+    - uses: actions/cache/restore@v4
      id: ocr_model_cache
      with:
-        path: models_ocr_v0_5_0
+        path: models_ocr_v0_5_1
-        key: ${{ runner.os }}-models
+        key: ocr-models
-    - uses: actions/cache@v4
+    - uses: actions/cache/restore@v4
      id: bin_model_cache
      with:
        path: default-2021-03-09
-        key: ${{ runner.os }}-modelbin
+        key: bin-models
    - name: Download models
      if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true
      run: make models
    - uses: actions/cache/save@v4
      if: steps.seg_model_cache.outputs.cache-hit != 'true'
      with:
        path: models_layout_v0_5_0
        key: seg-models
    - uses: actions/cache/save@v4
      if: steps.ocr_model_cache.outputs.cache-hit != 'true'
      with:
        path: models_ocr_v0_5_1
        key: ocr-models
    - uses: actions/cache/save@v4
      if: steps.bin_model_cache.outputs.cache-hit != 'true'
      with:
        path: default-2021-03-09
        key: bin-models
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v5
      with:
--- a/.gitignore
+++ b/.gitignore
@ -2,7 +2,11 @@
 __pycache__
 sbb_newspapers_org_image/pylint.log
 models_eynollah*
 models_ocr*
 models_layout*
 default-2021-03-09
 output.html
 /build
 /dist
 *.tif
 TAGS
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,33 @@ Versioned according to [Semantic Versioning](http://semver.org/).
 ## Unreleased
 Fixed:
 * :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.)
 * `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify
 * `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring
 * `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice
 * `get_marginals`: exit early if no peaks found to avoid spurious overlap mask
 * `get_smallest_skew`: after shifting search range of rotation angle, use overall best result
 * Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR)
 * OCR: re-instate missing methods and fix `utils_ocr` function calls
 * :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`)
 f458e3e
 * tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate`
   (so CUDA memory gets freed between tests if running on GPU)
 Changed:
 * polygons: slightly widen for regions and lines, increase for separators
 * various refactorings, some code style and identifier improvements
 * deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster), 
   but use shared memory if necessary, and switch back from `loky` to stdlib,
   and shutdown in `del()` instead of `atexit`
 * :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too
 * :fire: writer: use `@type='heading'` instead of `'header'` for headings
 * CI: update+improve model caching
 ## [0.5.0] - 2025-09-26
 Fixed:
--- a/2
+++ b/2
@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename
 RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json
 # install everything and reduce image size
 RUN make install EXTRAS=OCR && rm -rf /build/eynollah
 # fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow)
 RUN pip install nvidia-cudnn-cu11==8.6.0.163
 # smoke test
 RUN eynollah --help
--- a/52
+++ b/52
@ -13,12 +13,18 @@ DOCKER ?= docker
 #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
 #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
 SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
 SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
 SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
 BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
 BIN_MODELFILE = $(notdir $(BIN_MODEL))
 BIN_MODELNAME := default-2021-03-09
-OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1
+OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
 OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
 OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
-PYTEST_ARGS ?= -vv
+PYTEST_ARGS ?= -vv --isolate
 # BEGIN-EVAL makefile-parser --make-help Makefile
@ -31,7 +37,8 @@ help:
 	@echo "    install      Install package with pip"
 	@echo "    install-dev  Install editable with pip"
 	@echo "    deps-test    Install test dependencies with pip"
-	@echo "    models       Download and extract models to $(CURDIR)/models_layout_v0_5_0"
+	@echo "    models       Download and extract models to $(CURDIR):"
 	@echo "                 $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)"
 	@echo "    smoke-test   Run simple CLI check"
 	@echo "    ocrd-test    Run OCR-D CLI check"
 	@echo "    test         Run unit tests"
@ -42,33 +49,29 @@ help:
 	@echo "    PYTEST_ARGS  pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]"
 	@echo "    SEG_MODEL    URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]"
 	@echo "    BIN_MODEL    URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]"
 	@echo "    OCR_MODEL    URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]"
 	@echo ""
 # END-EVAL
 # Download and extract models to $(PWD)/models_layout_v0_5_0
-models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09
+models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
-models_layout_v0_5_0: models_layout_v0_5_0.tar.gz
+$(BIN_MODELFILE):
-	tar zxf models_layout_v0_5_0.tar.gz
+	wget -O $@ $(BIN_MODEL)
-
+$(SEG_MODELFILE):
 models_layout_v0_5_0.tar.gz:
 	wget -O $@ $(SEG_MODEL)
-
+$(OCR_MODELFILE):
 models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz
 	tar zxf models_ocr_v0_5_0.tar.gz
 models_ocr_v0_5_0.tar.gz:
 	wget -O $@ $(OCR_MODEL)
-default-2021-03-09: $(notdir $(BIN_MODEL))
+$(BIN_MODELNAME): $(BIN_MODELFILE)
 	unzip $(notdir $(BIN_MODEL))
 	mkdir $@
-	mv $(basename $(notdir $(BIN_MODEL))) $@
+	unzip -d $@ $<
-
+$(SEG_MODELNAME): $(SEG_MODELFILE)
-$(notdir $(BIN_MODEL)):
+	tar zxf $<
-	wget $(BIN_MODEL)
+$(OCR_MODELNAME): $(OCR_MODELFILE)
 	tar zxf $<
 build:
 	$(PIP) install build
@ -82,7 +85,10 @@ install:
 install-dev:
 	$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)])
-deps-test: models_layout_v0_5_0
+ifeq (OCR,$(findstring OCR, $(EXTRAS)))
 deps-test: $(OCR_MODELNAME)
 endif
 deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
 	$(PIP) install -r requirements-test.txt
 smoke-test: TMPDIR != mktemp -d
@ -123,9 +129,9 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
 	$(RM) -r $(TMPDIR)
 # Run unit tests
-test: export MODELS_LAYOUT=$(CURDIR)/models_layout_v0_5_0
+test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME)
-test: export MODELS_OCR=$(CURDIR)/models_ocr_v0_5_0
+test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME)
-test: export MODELS_BIN=$(CURDIR)/default-2021-03-09
+test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME)
 test:
 	$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)
--- a/requirements-test.txt
+++ b/requirements-test.txt
@ -1,4 +1,4 @@
 pytest
-pytest-subtests
+pytest-isolate
 coverage[toml]
 black
--- a/requirements.txt
+++ b/requirements.txt
@ -5,5 +5,4 @@ scikit-learn >= 0.23.2
 tensorflow < 2.13
 numba <= 0.58.1
 scikit-image
 loky
 biopython
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -1,3 +1,5 @@
 from typing import Tuple
 from logging import getLogger
 import time
 import math
@ -298,9 +300,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
            x_end_with_child_without_mother,
            new_main_sep_y)
 def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
    return (box[1], box[1] + box[3],
            box[0], box[0] + box[2])
 def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]:
    return (slice(box[1], box[1] + box[3]),
            slice(box[0], box[0] + box[2]))
 def crop_image_inside_box(box, img_org_copy):
-    image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
+    image_box = img_org_copy[box2slice(box)]
-    return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
+    return image_box, box2rect(box)
 def otsu_copy_binary(img):
    img_r = np.zeros((img.shape[0], img.shape[1], 3))
@ -373,6 +383,10 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
    return np.std(z)
 def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
    if not regions_without_separators.any():
        return 0, []
    #plt.imshow(regions_without_separators)
    #plt.show()
    regions_without_separators_0 = regions_without_separators.sum(axis=0)
    ##plt.plot(regions_without_separators_0)
    ##plt.show()
@ -392,6 +406,9 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
    zneg = gaussian_filter1d(zneg, sigma_)
    peaks_neg, _ = find_peaks(zneg, height=0)
    #plt.plot(zneg)
    #plt.plot(peaks_neg, zneg[peaks_neg], 'rx')
    #plt.show()
    peaks, _ = find_peaks(z, height=0)
    peaks_neg = peaks_neg - 10 - 10
@ -406,9 +423,13 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
                          (peaks_neg < (regions_without_separators.shape[1] - 370))]
    interest_pos = z[peaks]
    interest_pos = interest_pos[interest_pos > 10]
    if not interest_pos.any():
        return 0, []
    # plt.plot(z)
    # plt.show()
    interest_neg = z[peaks_neg]
    if not interest_neg.any():
        return 0, []
    min_peaks_pos = np.min(interest_pos)
    max_peaks_pos = np.max(interest_pos)
@ -955,11 +976,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
    regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
                                                         regions_model_full.shape[0] // zoom),
                                    interpolation=cv2.INTER_NEAREST)
-    contours_only_text_parent = [(i / zoom).astype(int) for i in  contours_only_text_parent]
+    contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent]
    ###
    cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
-        find_new_features_of_contours(contours_only_text_parent)
+        find_new_features_of_contours(contours_only_text_parent_z)
    length_con=x_max_main-x_min_main
    height_con=y_max_main-y_min_main
@ -982,8 +1003,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
    contours_only_text_parent_main_d=[]
    contours_only_text_parent_head_d=[]
-    for ii in range(len(contours_only_text_parent)):
+    for ii, con in enumerate(contours_only_text_parent_z):
        con=contours_only_text_parent[ii]
        img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3))
        img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
@ -992,25 +1012,30 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
                       (regions_model_full[:,:,0]==2)).sum()
        pixels_main = all_pixels - pixels_header
-        if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
+        if (( pixels_header / float(pixels_main) >= 0.6 and
-            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
+              length_con[ii] / float(height_con[ii]) >= 1.3 and
-            contours_only_text_parent_head.append(con)
+              length_con[ii] / float(height_con[ii]) <= 3 ) or
            ( pixels_header / float(pixels_main) >= 0.3 and
              length_con[ii] / float(height_con[ii]) >=3 )):
            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2
            contours_only_text_parent_head.append(contours_only_text_parent[ii])
            conf_contours_head.append(None) # why not conf_contours[ii], too?
            if contours_only_text_parent_d_ordered is not None:
                contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
            all_box_coord_head.append(all_box_coord[ii])
            slopes_head.append(slopes[ii])
            all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
-            conf_contours_head.append(None)
+
        else:
-            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
+            regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1
-            contours_only_text_parent_main.append(con)
+            contours_only_text_parent_main.append(contours_only_text_parent[ii])
            conf_contours_main.append(conf_contours[ii])
            if contours_only_text_parent_d_ordered is not None:
                contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
            all_box_coord_main.append(all_box_coord[ii])
            slopes_main.append(slopes[ii])
            all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
        #print(all_pixels,pixels_main,pixels_header)
    ### to make it faster
@ -1018,8 +1043,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
    # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
    #                                       regions_model_full.shape[0] // zoom),
    #                                 interpolation=cv2.INTER_NEAREST)
    contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
    contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
    ###
    return (regions_model_1,
@ -1626,12 +1649,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
 def return_boxes_of_images_by_order_of_reading_new(
        splitter_y_new, regions_without_separators,
        matrix_of_lines_ch,
-        num_col_classifier, erosion_hurts, tables, right2left_readingorder):
+        num_col_classifier, erosion_hurts, tables,
        right2left_readingorder,
        logger=None):
    if right2left_readingorder:
        regions_without_separators = cv2.flip(regions_without_separators,1)
    if logger is None:
        logger = getLogger(__package__)
    logger.debug('enter return_boxes_of_images_by_order_of_reading_new')
    boxes=[]
    peaks_neg_tot_tables = []
    splitter_y_new = np.array(splitter_y_new, dtype=int)
    for i in range(len(splitter_y_new)-1):
        #print(splitter_y_new[i],splitter_y_new[i+1])
        matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) &
@ -1644,24 +1674,19 @@ def return_boxes_of_images_by_order_of_reading_new(
        #    0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
        if True:
            try:
-                if erosion_hurts:
+                num_col, peaks_neg_fin = find_num_col(
-                    num_col, peaks_neg_fin = find_num_col(
+                    regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
-                        regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
+                    num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.)
                        num_col_classifier, tables, multiplier=6.)
                else:
                    num_col, peaks_neg_fin = find_num_col(
                        regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
                        num_col_classifier, tables, multiplier=7.)
            except:
                peaks_neg_fin=[]
                num_col = 0
            try:
                peaks_neg_fin_org=np.copy(peaks_neg_fin)
                if (len(peaks_neg_fin)+1)<num_col_classifier or num_col_classifier==6:
                    #print('burda')
                    peaks_neg_fin_org = np.copy(peaks_neg_fin)
                    if len(peaks_neg_fin)==0:
                        num_col, peaks_neg_fin = find_num_col(
-                            regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
+                            regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
                            num_col_classifier, tables, multiplier=3.)
                    peaks_neg_fin_early=[]
                    peaks_neg_fin_early.append(0)
@ -1674,21 +1699,21 @@ def return_boxes_of_images_by_order_of_reading_new(
                    peaks_neg_fin_rev=[]
                    for i_n in range(len(peaks_neg_fin_early)-1):
                        #print(i_n,'i_n')
-                        #plt.plot(regions_without_separators[int(splitter_y_new[i]):
+                        #plt.plot(regions_without_separators[splitter_y_new[i]:
-                        #                                    int(splitter_y_new[i+1]),
+                        #                                    splitter_y_new[i+1],
                        #                                    peaks_neg_fin_early[i_n]:
                        #                                    peaks_neg_fin_early[i_n+1]].sum(axis=0) )
                        #plt.show()
                        try:
                            num_col, peaks_neg_fin1 = find_num_col(
-                                regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
+                                regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
                                                           peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
                                num_col_classifier,tables, multiplier=7.)
                        except:
                            peaks_neg_fin1=[]
                        try:
                            num_col, peaks_neg_fin2 = find_num_col(
-                                regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
+                                regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
                                                           peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
                                num_col_classifier,tables, multiplier=5.)
                        except:
@ -1714,9 +1739,9 @@ def return_boxes_of_images_by_order_of_reading_new(
                    #print(peaks_neg_fin,'peaks_neg_fin')
            except:
-                pass
+                logger.exception("cannot find peaks consistent with columns")
            #num_col, peaks_neg_fin = find_num_col(
-            #    regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
+            #    regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],:],
            #    multiplier=7.0)
            x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
            x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
@ -1738,31 +1763,28 @@ def return_boxes_of_images_by_order_of_reading_new(
                y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
                new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
                    x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff)
            x_starting = np.array(x_starting)
            x_ending = np.array(x_ending)
            y_type_2 = np.array(y_type_2)
            y_diff_type_2 = np.array(y_diff_type_2)
            all_columns = set(range(len(peaks_neg_tot) - 1))
            if ((reading_order_type==1) or
                (reading_order_type==0 and
                 (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))):
                try:
-                    y_grenze=int(splitter_y_new[i])+300
+                    y_grenze = splitter_y_new[i] + 300
                    #check if there is a big separator in this y_mains_sep_ohne_grenzen
                    args_early_ys=np.arange(len(y_type_2))
                    #print(args_early_ys,'args_early_ys')
-                    #print(int(splitter_y_new[i]),int(splitter_y_new[i+1]))
+                    #print(splitter_y_new[i], splitter_y_new[i+1])
-                    x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) &
+                    x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) &
                                               (y_type_2 <= y_grenze)]
-                    x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) &
+                    x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) &
                                           (y_type_2 <= y_grenze)]
-                    y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                    y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) &
                                           (y_type_2 <= y_grenze)]
-                    y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                    y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
                                                     (y_type_2 <= y_grenze)]
-                    args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) &
+                    args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) &
                                            (y_type_2 <= y_grenze)]
                    if len(y_type_2_up) > 0:
                        y_main_separator_up = y_type_2_up [(x_starting_up==0) &
@ -1776,8 +1798,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                            args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
                            #print(args_to_be_kept,'args_to_be_kept')
                            boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
-                                          int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))])
+                                          splitter_y_new[i], y_diff_main_separator_up.max()])
-                            splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0]
+                            splitter_y_new[i] = y_diff_main_separator_up.max()
                            #print(splitter_y_new[i],'splitter_y_new[i]')
                            y_type_2 = y_type_2[args_to_be_kept]
@ -1786,29 +1808,28 @@ def return_boxes_of_images_by_order_of_reading_new(
                            y_diff_type_2 = y_diff_type_2[args_to_be_kept]
                            #print('galdiha')
-                            y_grenze=int(splitter_y_new[i])+200
+                            y_grenze = splitter_y_new[i] + 200
                            args_early_ys2=np.arange(len(y_type_2))
-                            y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                            y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) &
                                                 (y_type_2 <= y_grenze)]
-                            x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) &
+                            x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) &
                                                     (y_type_2 <= y_grenze)]
-                            x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) &
+                            x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) &
                                                 (y_type_2 <= y_grenze)]
-                            y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
+                            y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
                                                           (y_type_2 <= y_grenze)]
-                            args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) &
+                            args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) &
                                                    (y_type_2 <= y_grenze)]
                            #print(y_type_2_up,x_starting_up,x_ending_up,'didid')
-                            nodes_in = []
+                            nodes_in = set()
                            for ij in range(len(x_starting_up)):
-                                nodes_in = nodes_in + list(range(int(x_starting_up[ij]),
+                                nodes_in.update(range(x_starting_up[ij],
-                                                                 int(x_ending_up[ij])))
+                                                      x_ending_up[ij]))
                            nodes_in = np.unique(nodes_in)
                            #print(nodes_in,'nodes_in')
-                            if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
+                            if nodes_in == set(range(len(peaks_neg_tot)-1)):
                                pass
-                            elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)):
+                            elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
                                pass
                            else:
                                #print('burdaydikh')
@ -1823,17 +1844,16 @@ def return_boxes_of_images_by_order_of_reading_new(
                                    pass
                                #print('burdaydikh2')
                        elif len(y_diff_main_separator_up)==0:
-                            nodes_in = []
+                            nodes_in = set()
                            for ij in range(len(x_starting_up)):
-                                nodes_in = nodes_in + list(range(int(x_starting_up[ij]),
+                                nodes_in.update(range(x_starting_up[ij],
-                                                                 int(x_ending_up[ij])))
+                                                      x_ending_up[ij]))
                            nodes_in = np.unique(nodes_in)
                            #print(nodes_in,'nodes_in2')
                            #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
-                            if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
+                            if nodes_in == set(range(len(peaks_neg_tot)-1)):
                                pass
-                            elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)):
+                            elif nodes_in == set(range(1,len(peaks_neg_tot)-1)):
                                pass
                            else:
                                #print('burdaydikh')
@ -1858,26 +1878,25 @@ def return_boxes_of_images_by_order_of_reading_new(
                    x_end_by_order=[]
                    if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1:
                        if reading_order_type==1:
-                            y_lines_by_order.append(int(splitter_y_new[i]))
+                            y_lines_by_order.append(splitter_y_new[i])
                            x_start_by_order.append(0)
                            x_end_by_order.append(len(peaks_neg_tot)-2)
                        else:
                            #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                            columns_covered_by_mothers = []
+                            columns_covered_by_mothers = set()
                            for dj in range(len(x_start_without_mother)):
-                                columns_covered_by_mothers = columns_covered_by_mothers + \
+                                columns_covered_by_mothers.update(
-                                    list(range(int(x_start_without_mother[dj]),
+                                    range(x_start_without_mother[dj],
-                                               int(x_end_without_mother[dj])))
+                                          x_end_without_mother[dj]))
-                            columns_covered_by_mothers = list(set(columns_covered_by_mothers))
+                            columns_not_covered = list(all_columns - columns_covered_by_mothers)
-
+                            y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) +
-                            all_columns=np.arange(len(peaks_neg_tot)-1)
+                                                                   len(x_start_without_mother),
-                            columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
+                                                                   dtype=int) * splitter_y_new[i])
-                            y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
+                            ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                            ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
                            ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                            x_starting = np.append(x_starting, columns_not_covered)
+                            x_starting = np.append(x_starting, np.array(columns_not_covered, int))
                            x_starting = np.append(x_starting, x_start_without_mother)
-                            x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                            x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                            x_ending = np.append(x_ending, x_end_without_mother)
                        ind_args=np.arange(len(y_type_2))
@ -1906,42 +1925,39 @@ def return_boxes_of_images_by_order_of_reading_new(
                                x_end_by_order.append(x_end_column_sort[ii]-1)
                    else:
                        #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                        columns_covered_by_mothers = []
+                        columns_covered_by_mothers = set()
                        for dj in range(len(x_start_without_mother)):
-                            columns_covered_by_mothers = columns_covered_by_mothers + \
+                            columns_covered_by_mothers.update(
-                                list(range(int(x_start_without_mother[dj]),
+                                range(x_start_without_mother[dj],
-                                           int(x_end_without_mother[dj])))
+                                      x_end_without_mother[dj]))
-                        columns_covered_by_mothers = list(set(columns_covered_by_mothers))
+                        columns_not_covered = list(all_columns - columns_covered_by_mothers)
-
+                        y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
-                        all_columns=np.arange(len(peaks_neg_tot)-1)
+                                                               dtype=int) * splitter_y_new[i])
-                        columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
+                        ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                        y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
                        ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
                        ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                        x_starting = np.append(x_starting, columns_not_covered)
+                        x_starting = np.append(x_starting, np.array(columns_not_covered, int))
                        x_starting = np.append(x_starting, x_start_without_mother)
-                        x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                        x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                        x_ending = np.append(x_ending, x_end_without_mother)
-                        columns_covered_by_with_child_no_mothers = []
+                        columns_covered_by_with_child_no_mothers = set()
                        for dj in range(len(x_end_with_child_without_mother)):
-                            columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
+                            columns_covered_by_with_child_no_mothers.update(
-                                list(range(int(x_start_with_child_without_mother[dj]),
+                                range(x_start_with_child_without_mother[dj],
-                                           int(x_end_with_child_without_mother[dj])))
+                                      x_end_with_child_without_mother[dj]))
-                        columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers))
+                        columns_not_covered_child_no_mother = list(
-
+                            all_columns - columns_covered_by_with_child_no_mothers)
                        all_columns = np.arange(len(peaks_neg_tot)-1)
                        columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers))
                        #indexes_to_be_spanned=[]
                        for i_s in range(len(x_end_with_child_without_mother)):
                            columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
                        columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
                        ind_args = np.arange(len(y_type_2))
-                        x_end_with_child_without_mother = np.array(x_end_with_child_without_mother)
+                        x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int)
-                        x_start_with_child_without_mother = np.array(x_start_with_child_without_mother)
+                        x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int)
                        for i_s_nc in columns_not_covered_child_no_mother:
                            if i_s_nc in x_start_with_child_without_mother:
-                                x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
+                                x_end_biggest_column = \
                                    x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
                                args_all_biggest_lines = ind_args[(x_starting==i_s_nc) &
                                                                  (x_ending==x_end_biggest_column)]
                                y_column_nc = y_type_2[args_all_biggest_lines]
@ -1951,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                for i_c in range(len(y_column_nc)):
                                    if i_c==(len(y_column_nc)-1):
                                        ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) &
-                                                                              (y_type_2<int(splitter_y_new[i+1])) &
+                                                                              (y_type_2<splitter_y_new[i+1]) &
                                                                              (x_starting>=i_s_nc) &
                                                                              (x_ending<=x_end_biggest_column)]
                                    else:
@ -1967,21 +1983,19 @@ def return_boxes_of_images_by_order_of_reading_new(
                                    if len(x_diff_all_between_nm_wc)>0:
                                        biggest=np.argmax(x_diff_all_between_nm_wc)
-                                    columns_covered_by_mothers = []
+                                    columns_covered_by_mothers = set()
                                    for dj in range(len(x_starting_all_between_nm_wc)):
-                                        columns_covered_by_mothers = columns_covered_by_mothers + \
+                                        columns_covered_by_mothers.update(
-                                            list(range(int(x_starting_all_between_nm_wc[dj]),
+                                            range(x_starting_all_between_nm_wc[dj],
-                                                       int(x_ending_all_between_nm_wc[dj])))
+                                                  x_ending_all_between_nm_wc[dj]))
-                                    columns_covered_by_mothers = list(set(columns_covered_by_mothers))
+                                    child_columns = set(range(i_s_nc, x_end_biggest_column))
-
+                                    columns_not_covered = list(child_columns - columns_covered_by_mothers)
                                    all_columns=np.arange(i_s_nc, x_end_biggest_column)
                                    columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
                                    should_longest_line_be_extended=0
                                    if (len(x_diff_all_between_nm_wc) > 0 and
-                                        set(list(range(int(x_starting_all_between_nm_wc[biggest]),
+                                        set(list(range(x_starting_all_between_nm_wc[biggest],
-                                                        int(x_ending_all_between_nm_wc[biggest]))) +
+                                                        x_ending_all_between_nm_wc[biggest])) +
-                                            list(columns_not_covered)) != set(all_columns)):
+                                            list(columns_not_covered)) != child_columns):
                                        should_longest_line_be_extended=1
                                        index_lines_so_close_to_top_separator = \
                                            np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
@ -1991,9 +2005,12 @@ def return_boxes_of_images_by_order_of_reading_new(
                                                np.array(list(set(list(range(len(y_all_between_nm_wc)))) -
                                                              set(list(index_lines_so_close_to_top_separator))))
                                            if len(indexes_remained_after_deleting_closed_lines) > 0:
-                                                y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
+                                                y_all_between_nm_wc = \
-                                                x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
+                                                    y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
-                                                x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
+                                                x_starting_all_between_nm_wc = \
                                                    x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
                                                x_ending_all_between_nm_wc = \
                                                    x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
                                        y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
                                        x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc)
@ -2005,11 +2022,11 @@ def return_boxes_of_images_by_order_of_reading_new(
                                            x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
                                            x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
                                        except:
-                                            pass
+                                            logger.exception("cannot append")
                                    y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
-                                    x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered)
+                                    x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
-                                    x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1)
+                                    x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
                                    ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
                                    for column in range(int(i_s_nc), int(x_end_biggest_column)):
@ -2078,52 +2095,50 @@ def return_boxes_of_images_by_order_of_reading_new(
                            if len(y_in_cols)>0:
                                y_down=np.min(y_in_cols)
                            else:
-                                y_down=[int(splitter_y_new[i+1])][0]
+                                y_down=splitter_y_new[i+1]
                            #print(y_itself,'y_itself')
                            boxes.append([peaks_neg_tot[column],
                                          peaks_neg_tot[column+1],
                                          y_itself,
                                          y_down])
                except:
                    logger.exception("cannot assign boxes")
                    boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
-                                  int(splitter_y_new[i]), int(splitter_y_new[i+1])])
+                                  splitter_y_new[i], splitter_y_new[i+1]])
            else:
                y_lines_by_order=[]
                x_start_by_order=[]
                x_end_by_order=[]
                if len(x_starting)>0:
-                    all_columns = np.arange(len(peaks_neg_tot)-1)
+                    columns_covered_by_lines_covered_more_than_2col = set()
                    columns_covered_by_lines_covered_more_than_2col = []
                    for dj in range(len(x_starting)):
-                        if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns):
+                        if set(range(x_starting[dj], x_ending[dj])) != all_columns:
-                            pass
+                            columns_covered_by_lines_covered_more_than_2col.update(
-                        else:
+                                range(x_starting[dj], x_ending[dj]))
-                            columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \
+                    columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col)
                                list(range(int(x_starting[dj]),int(x_ending[dj]) ))
                    columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
                    columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
-                    y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
+                    y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1,
-                    ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
+                                                           dtype=int) * splitter_y_new[i])
                    ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                    ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, columns_not_covered)
+                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
-                    x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
                    if len(new_main_sep_y) > 0:
                        x_starting = np.append(x_starting, 0)
-                        x_ending = np.append(x_ending, len(peaks_neg_tot)-1)
+                        x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
                    else:
                        x_starting = np.append(x_starting, x_starting[0])
                        x_ending = np.append(x_ending, x_ending[0])
                else:
-                    all_columns = np.arange(len(peaks_neg_tot)-1)
+                    columns_not_covered = list(all_columns)
-                    columns_not_covered = list(set(all_columns))
+                    y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered),
-                    y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
+                                                           dtype=int) * splitter_y_new[i])
-                    ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
+                    ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
                    ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, columns_not_covered)
+                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
-                    x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
+                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
-                ind_args=np.array(range(len(y_type_2)))
+                ind_args = np.arange(len(y_type_2))
                for column in range(len(peaks_neg_tot)-1):
                    #print(column,'column')
@ -2155,7 +2170,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                    x_start_itself=x_start_copy.pop(il)
                    x_end_itself=x_end_copy.pop(il)
-                    for column in range(int(x_start_itself), int(x_end_itself)+1):
+                    for column in range(x_start_itself, x_end_itself+1):
                        #print(column,'cols')
                        y_in_cols=[]
                        for yic in range(len(y_copy)):
@ -2169,7 +2184,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                        if len(y_in_cols)>0:
                            y_down=np.min(y_in_cols)
                        else:
-                            y_down=[int(splitter_y_new[i+1])][0]
+                            y_down=splitter_y_new[i+1]
                        #print(y_itself,'y_itself')
                        boxes.append([peaks_neg_tot[column],
                                      peaks_neg_tot[column+1],
@ -2191,9 +2206,10 @@ def return_boxes_of_images_by_order_of_reading_new(
            x_end_new = regions_without_separators.shape[1] - boxes[i][0]
            boxes[i][0] = x_start_new
            boxes[i][1] = x_end_new
-        return boxes, peaks_neg_tot_tables_new
+        peaks_neg_tot_tables = peaks_neg_tot_tables_new
-    else:
+
-        return boxes, peaks_neg_tot_tables
+    logger.debug('exit return_boxes_of_images_by_order_of_reading_new')
    return boxes, peaks_neg_tot_tables
 def is_image_filename(fname: str) -> bool:
    return fname.lower().endswith(('.jpg',
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@ -1,7 +1,15 @@
 from typing import Sequence, Union
 from numbers import Number
 from functools import partial
 import itertools
 import cv2
 import numpy as np
-from shapely import geometry
+from scipy.sparse.csgraph import minimum_spanning_tree
 from shapely.geometry import Polygon, LineString
 from shapely.geometry.polygon import orient
 from shapely import set_precision
 from shapely.ops import unary_union, nearest_points
 from .rotate import rotate_image, rotation_image_new
@ -37,29 +45,28 @@ def get_text_region_boxes_by_given_contours(contours):
    return boxes, contours_new
-def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
+def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
    found_polygons_early = []
-    for jv,c in enumerate(contours):
+    for jv, contour in enumerate(contours):
-        if len(c) < 3:  # A polygon cannot have less than 3 points
+        if len(contour) < 3:  # A polygon cannot have less than 3 points
            continue
-        polygon = geometry.Polygon([point[0] for point in c])
+        polygon = contour2polygon(contour, dilate=dilate)
        area = polygon.area
        if (area >= min_area * np.prod(image.shape[:2]) and
            area <= max_area * np.prod(image.shape[:2]) and
            hierarchy[0][jv][3] == -1):
-            found_polygons_early.append(np.array([[point]
+            found_polygons_early.append(polygon2contour(polygon))
                                                  for point in polygon.exterior.coords], dtype=np.uint))
    return found_polygons_early
-def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
+def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
    found_polygons_early = []
-    for jv,c in enumerate(contours):
+    for jv, contour in enumerate(contours):
-        if len(c) < 3:  # A polygon cannot have less than 3 points
+        if len(contour) < 3:  # A polygon cannot have less than 3 points
            continue
-        polygon = geometry.Polygon([point[0] for point in c])
+        polygon = contour2polygon(contour, dilate=dilate)
-        # area = cv2.contourArea(c)
+        # area = cv2.contourArea(contour)
        area = polygon.area
        ##print(np.prod(thresh.shape[:2]))
        # Check that polygon has area greater than minimal area
@ -68,9 +75,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
            area <= max_area * np.prod(image.shape[:2]) and
            # hierarchy[0][jv][3]==-1
            True):
-            # print(c[0][0][1])
+            # print(contour[0][0][1])
-            found_polygons_early.append(np.array([[point]
+            found_polygons_early.append(polygon2contour(polygon))
                                                  for point in polygon.exterior.coords], dtype=np.int32))
    return found_polygons_early
 def find_new_features_of_contours(contours_main):
@ -135,12 +141,12 @@ def return_parent_contours(contours, hierarchy):
                       if hierarchy[0][i][3] == -1]
    return contours_parent
-def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
+def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
    # pixels of images are identified by 5
    if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
    else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -247,30 +253,26 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
        cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
    return cont_int[0], index_r_con, confidence_contour
-def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map):
+def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
    if not len(cnts):
        return [], []
    confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
    img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
    ##cnts = list( (np.array(cnts)/2).astype(np.int16) )
    #cnts = cnts/2
    cnts = [(i/6).astype(int) for i in cnts]
    results = map(partial(do_back_rotation_and_get_cnt_back,
                          img=img,
                          slope_first=slope_first,
                          confidence_matrix=confidence_matrix,
                          ),
                  cnts, range(len(cnts)))
    contours, indexes, conf_contours = tuple(zip(*results))
    return [i*6 for i in contours], list(conf_contours)
-def return_contours_of_interested_textline(region_pre_p, pixel):
+    confidence_matrix = cv2.resize(confidence_matrix,
                                   (img.shape[1] // 6, img.shape[0] // 6),
                                   interpolation=cv2.INTER_NEAREST)
    confs = []
    for cnt in cnts:
        cnt_mask = np.zeros(confidence_matrix.shape)
        cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
        confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
    return cnts, confs
 def return_contours_of_interested_textline(region_pre_p, label):
    # pixels of images are identified by 5
    if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
    else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -293,12 +295,12 @@ def return_contours_of_image(image):
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    return contours, hierarchy
-def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
+def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003):
    # pixels of images are identified by 5
    if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
    else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -311,12 +313,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si
    return contours_imgs
-def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
+def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area):
    # pixels of images are identified by 5
    if len(region_pre_p.shape) == 3:
-        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+        cnts_images = (region_pre_p[:, :, 0] == label) * 1
    else:
-        cnts_images = (region_pre_p[:, :] == pixel) * 1
+        cnts_images = (region_pre_p[:, :] == label) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -332,3 +334,97 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
    return img_ret[:, :, 0]
 def dilate_textline_contours(all_found_textline_polygons):
    return [[polygon2contour(contour2polygon(contour, dilate=6))
             for contour in region]
            for region in all_found_textline_polygons]
 def dilate_textregion_contours(all_found_textline_polygons):
    return [polygon2contour(contour2polygon(contour, dilate=6))
            for contour in all_found_textline_polygons]
 def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
    polygon = Polygon([point[0] for point in contour])
    if dilate:
        polygon = polygon.buffer(dilate)
    if polygon.geom_type == 'GeometryCollection':
        # heterogeneous result: filter zero-area shapes (LineString, Point)
        polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
    if polygon.geom_type == 'MultiPolygon':
        # homogeneous result: construct convex hull to connect
        polygon = join_polygons(polygon.geoms)
    return make_valid(polygon)
 def polygon2contour(polygon: Polygon) -> np.ndarray:
    polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
    return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
 def make_valid(polygon: Polygon) -> Polygon:
    """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
    def isint(x):
        return isinstance(x, int) or int(x) == x
    # make sure rounding does not invalidate
    if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
        polygon = Polygon(np.round(polygon.exterior.coords))
    points = list(polygon.exterior.coords[:-1])
    # try by re-arranging points
    for split in range(1, len(points)):
        if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
            break
        # simplification may not be possible (at all) due to ordering
        # in that case, try another starting point
        polygon = Polygon(points[-split:]+points[:-split])
    # try by simplification
    for tolerance in range(int(polygon.area + 1.5)):
        if polygon.is_valid:
            break
        # simplification may require a larger tolerance
        polygon = polygon.simplify(tolerance + 1)
    # try by enlarging
    for tolerance in range(1, int(polygon.area + 2.5)):
        if polygon.is_valid:
            break
        # enlargement may require a larger tolerance
        polygon = polygon.buffer(tolerance)
    assert polygon.is_valid, polygon.wkt
    return polygon
 def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
    """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
    # ensure input polygons are simply typed and all oriented equally
    polygons = [orient(poly)
                for poly in itertools.chain.from_iterable(
                        [poly.geoms
                         if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
                         else [poly]
                         for poly in polygons])]
    npoly = len(polygons)
    if npoly == 1:
        return polygons[0]
    # find min-dist path through all polygons (travelling salesman)
    pairs = itertools.combinations(range(npoly), 2)
    dists = np.zeros((npoly, npoly), dtype=float)
    for i, j in pairs:
        dist = polygons[i].distance(polygons[j])
        if dist < 1e-5:
            dist = 1e-5 # if pair merely touches, we still need to get an edge
        dists[i, j] = dist
        dists[j, i] = dist
    dists = minimum_spanning_tree(dists, overwrite=True)
    # add bridge polygons (where necessary)
    for prevp, nextp in zip(*dists.nonzero()):
        prevp = polygons[prevp]
        nextp = polygons[nextp]
        nearest = nearest_points(prevp, nextp)
        bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
        polygons.append(bridgep)
    jointp = unary_union(polygons)
    assert jointp.geom_type == 'Polygon', jointp.wkt
    # follow-up calculations will necessarily be integer;
    # so anticipate rounding here and then ensure validity
    jointp2 = set_precision(jointp, 1.0)
    if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
        jointp2 = Polygon(np.round(jointp.exterior.coords))
        jointp2 = make_valid(jointp2)
    assert jointp2.geom_type == 'Polygon', jointp2.wkt
    return jointp2
--- a/src/eynollah/utils/marginals.py
+++ b/src/eynollah/utils/marginals.py
@ -99,6 +99,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
        except:
            point_left=first_nonzero
        if point_left == first_nonzero and point_right == last_nonzero:
            return text_regions
        if point_right>=mask_marginals.shape[1]:
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@ -17,9 +17,12 @@ from .contour import (
    return_contours_of_interested_textline,
    find_contours_mean_y_diff,
 )
 from .shm import share_ndarray, wrap_ndarray_shared
 from . import (
    find_num_col_deskew,
    crop_image_inside_box,
    box2rect,
    box2slice,
 )
 def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -64,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
            peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
            neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
-            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
+            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
                y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
            diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
            arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -75,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
            clusters_to_be_deleted = []
            if len(arg_diff_cluster) > 0:
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
+                clusters_to_be_deleted.append(
                    arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
                for i in range(len(arg_diff_cluster) - 1):
-                    clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
+                    clusters_to_be_deleted.append(
-                                                                          arg_diff_cluster[i + 1] + 1])
+                        arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
+                                                arg_diff_cluster[i + 1] + 1])
                clusters_to_be_deleted.append(
                    arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
            if len(clusters_to_be_deleted) > 0:
                peaks_new_extra = []
                for m in range(len(clusters_to_be_deleted)):
@ -176,7 +183,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
        peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
        neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
-        arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
            y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
        diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
        arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -236,7 +244,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
    try:
        neg_peaks_max=np.max(y_padded_smoothed[peaks])
-        arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
            y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
        diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
        arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -313,23 +322,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
                    point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_down =y_max_cont-1
                    ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
                    #point_up
                    # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
                else:
                    point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
-                    point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    point_down =y_max_cont-1
                    ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
                    #point_up
                    # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
                point_down_narrow = peaks[jj] + first_nonzero + int(
-                    1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
+                    1.4 * dis_to_next_down)
                ###-int(dis_to_next_down*1./2)
            else:
                dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
                dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
                if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
-                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
-                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    ##+int(dis_to_next_up*1./4.0)
                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
                else:
-                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
-                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    ##+int(dis_to_next_up*1./4.0)
                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
                point_down_narrow = peaks[jj] + first_nonzero + int(
                    1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
@ -338,7 +360,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                point_down_narrow = img_patch.shape[0] - 2
-            distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
                                              True)
                            for mj in range(len(xv))]
            distances = np.array(distances)
@ -465,7 +489,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                    point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
                                              True)
                         for mj in range(len(xv))]
            distances = np.array(distances)
@ -540,7 +565,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
                point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
                                              True)
                         for mj in range(len(xv))]
            distances = np.array(distances)
@ -610,7 +636,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
    neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg])
-    arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
+    arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
        y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
    diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
    arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -686,30 +713,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
                if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
-                    point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)
-                    point_down = x_max_cont - 1  ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    ##+int(dis_to_next_up*1./4.0)
                    point_down = x_max_cont - 1
                    ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
                    #point_up
                    # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
                else:
-                    point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)
-                    point_down = x_max_cont - 1  ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    ##+int(dis_to_next_up*1./4.0)
                    point_down = x_max_cont - 1
                    ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
                    #point_up
                    # np.max(y_cont)
                    #peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
-                point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
+                point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
                ###-int(dis_to_next_down*1./2)
            else:
                dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
                dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
                if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
-                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
-                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    ##+int(dis_to_next_up*1./4.0)
                    point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
                else:
-                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
+                    point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
-                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
+                    ##+int(dis_to_next_up*1./4.0)
                    point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
                    ###-int(dis_to_next_down*1./4.0)
-                point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
+                point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
                ###-int(dis_to_next_down*1./2)
            if point_down_narrow >= img_patch.shape[0]:
                point_down_narrow = img_patch.shape[0] - 2
-            distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))]
+            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
                                              True)
                         for mj in range(len(xv))]
            distances = np.array(distances)
            xvinside = xv[distances >= 0]
@ -798,7 +845,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next)
            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
                                              True)
                         for mj in range(len(xv))]
            distances = np.array(distances)
@ -863,7 +911,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down)
            distances = [cv2.pointPolygonTest(contour_text_interest_copy,
-                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
+                                              tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
                                              True)
                         for mj in range(len(xv))]
            distances = np.array(distances)
@ -947,7 +996,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
            peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
            neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
-            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
+            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
                y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
            diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
            arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -960,8 +1010,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
            if len(arg_diff_cluster) > 0:
                clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
                for i in range(len(arg_diff_cluster) - 1):
-                    clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
+                    clusters_to_be_deleted.append(
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
+                        arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
                                                arg_diff_cluster[i + 1] + 1])
                clusters_to_be_deleted.append(
                    arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
            if len(clusters_to_be_deleted) > 0:
                peaks_new_extra = []
                for m in range(len(clusters_to_be_deleted)):
@ -1011,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
    try:
        neg_peaks_max = np.max(y_padded_smoothed[peaks])
-        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
            y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
        diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
        arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -1287,7 +1341,9 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
    return None, cont_final
-def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
+def textline_contours_postprocessing(textline_mask, slope,
                                     contour_text_interest, box_ind,
                                     add_boxes_coor_into_textlines=False):
    textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
    textline_mask = textline_mask.astype(np.uint8)
    kernel = np.ones((5, 5), np.uint8)
@ -1347,24 +1403,26 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
    return contours_rotated_clean
-def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
+def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
    if logger is None:
        logger = getLogger(__package__)
    if not np.prod(img_crop.shape):
        return img_crop
    if num_col == 1:
-        num_patches = int(img_path.shape[1] / 200.0)
+        num_patches = int(img_crop.shape[1] / 200.0)
    else:
-        num_patches = int(img_path.shape[1] / 140.0)
+        num_patches = int(img_crop.shape[1] / 140.0)
-    # num_patches=int(img_path.shape[1]/200.)
+    # num_patches=int(img_crop.shape[1]/200.)
    if num_patches == 0:
        num_patches = 1
-    img_patch_ineterst = img_path[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
+    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
-    # plt.imshow(img_patch_ineterst)
+    # plt.imshow(img_patch_interest)
    # plt.show()
-    length_x = int(img_path.shape[1] / float(num_patches))
+    length_x = int(img_crop.shape[1] / float(num_patches))
    # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
    margin = int(0.04 * length_x)
    # if margin<=4:
@ -1372,7 +1430,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
    # margin=0
    width_mid = length_x - 2 * margin
-    nxf = img_path.shape[1] / float(width_mid)
+    nxf = img_crop.shape[1] / float(width_mid)
    if nxf > int(nxf):
        nxf = int(nxf) + 1
@ -1388,12 +1446,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
            index_x_d = i * width_mid
            index_x_u = index_x_d + length_x
-        if index_x_u > img_path.shape[1]:
+        if index_x_u > img_crop.shape[1]:
-            index_x_u = img_path.shape[1]
+            index_x_u = img_crop.shape[1]
-            index_x_d = img_path.shape[1] - length_x
+            index_x_d = img_crop.shape[1] - length_x
        # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-        img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
+        img_xline = img_patch_interest[:, index_x_d:index_x_u]
        try:
            assert img_xline.any()
@ -1409,9 +1467,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
        img_line_rotated = rotate_image(img_xline, slope_xline)
        img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
-    img_patch_ineterst = img_path[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
+    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
-    img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
+    img_patch_interest_revised = np.zeros(img_patch_interest.shape)
    for i in range(nxf):
        if i == 0:
@ -1421,11 +1479,11 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
            index_x_d = i * width_mid
            index_x_u = index_x_d + length_x
-        if index_x_u > img_path.shape[1]:
+        if index_x_u > img_crop.shape[1]:
-            index_x_u = img_path.shape[1]
+            index_x_u = img_crop.shape[1]
-            index_x_d = img_path.shape[1] - length_x
+            index_x_d = img_crop.shape[1] - length_x
-        img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
+        img_xline = img_patch_interest[:, index_x_d:index_x_u]
        img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
        img_int[:, :] = img_xline[:, :]  # img_patch_org[:,:,0]
@ -1448,11 +1506,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
            int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
        img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
-        img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
+        img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
-    return img_patch_ineterst_revised
+    return img_patch_interest_revised
-def do_image_rotation(angle, img, sigma_des, logger=None):
+@wrap_ndarray_shared(kw='img')
 def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
    if logger is None:
        logger = getLogger(__package__)
    img_rot = rotate_image(img, angle)
@ -1465,7 +1524,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None):
    return var
 def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
-                       main_page=False, logger=None, plotter=None, map=map):
+                       main_page=False, logger=None, plotter=None, map=None):
    if main_page and plotter:
        plotter.save_plot_of_textline_density(img_patch_org)
@ -1479,159 +1538,75 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
    onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
    #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
-    #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
+    #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0],
    #             int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
    img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
    if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
        angles = np.array([-45, 0, 45, 90,])
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
        angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
    elif main_page:
-        angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
+        #angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4),
                                 np.linspace(-6, 6, n_tot_angles // 2),
                                 np.linspace(7, 12, n_tot_angles // 4)))
        angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
        early_slope_edge=11
        if abs(angle) > early_slope_edge:
            if angle < 0:
-                angles = np.linspace(-90, -12, n_tot_angles)
+                angles2 = np.linspace(-90, -12, n_tot_angles)
            else:
-                angles = np.linspace(90, 12, n_tot_angles)
+                angles2 = np.linspace(90, 12, n_tot_angles)
-            angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+            angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
            if var2 > var:
                angle = angle2
    else:
        angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
-        angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+        angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
        early_slope_edge=22
        if abs(angle) > early_slope_edge:
            if angle < 0:
-                angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
+                angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
            else:
-                angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
+                angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
-            angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
+            angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
-
+            if var2 > var:
                angle = angle2
    return angle
 def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
    if logger is None:
        logger = getLogger(__package__)
-    results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles))
+    if map is None:
        results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger)
                   for angle in angles]
    else:
        with share_ndarray(img) as img_shared:
            results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None),
                               angles))
    if plotter:
        plotter.save_plot_of_rotation_angle(angles, results)
    try:
        var_res = np.array(results)
        assert var_res.any()
-        angle = angles[np.argmax(var_res)]
+        idx = np.argmax(var_res)
        angle = angles[idx]
        var = var_res[idx]
    except:
        logger.exception("cannot determine best angle among %s", str(angles))
        angle = 0
-    return angle
+        var = 0
-
+    return angle, var
 def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100,
                       main_page=False, logger=None, plotter=None):
    if main_page and plotter:
        plotter.save_plot_of_textline_density(img_patch_org)
    img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
    img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
    max_shape=np.max(img_int.shape)
    img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) ))
    onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.)
    onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
    img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
    if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
        angles = np.array([-45, 0, 45, 90,])
        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
        angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
    elif main_page:
        angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
        early_slope_edge=11
        if abs(angle) > early_slope_edge:
            if angle < 0:
                angles = np.linspace(-90, -12, n_tot_angles)
            else:
                angles = np.linspace(90, 12, n_tot_angles)
            angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
    else:
        angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
        early_slope_edge=22
        if abs(angle) > early_slope_edge:
            if angle < 0:
                angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
            else:
                angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
            angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
    return angle
 def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des):
    vars_per_each_subprocess = []
    angles_per_each_subprocess = []
    for mv in range(len(angles_per_process)):
        img_rot=rotate_image(img_resized,angles_per_process[mv])
        img_rot[img_rot!=0]=1
        try:
            var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
        except:
            var_spectrum=0
        vars_per_each_subprocess.append(var_spectrum)
        angles_per_each_subprocess.append(angles_per_process[mv])
    queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess])
 def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
    num_cores = cpu_count()
    queue_of_all_params = Queue()
    processes = []
    nh = np.linspace(0, len(angles), num_cores + 1)
    for i in range(num_cores):
        angles_per_process = angles[int(nh[i]) : int(nh[i + 1])]
        processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des)))
    for i in range(num_cores):
        processes[i].start()
    var_res=[]
    all_angles = []
    for i in range(num_cores):
        list_all_par = queue_of_all_params.get(True)
        vars_for_subprocess = list_all_par[0]
        angles_sub_process = list_all_par[1]
        for j in range(len(vars_for_subprocess)):
            var_res.append(vars_for_subprocess[j])
            all_angles.append(angles_sub_process[j])
    for i in range(num_cores):
        processes[i].join()
    if plotter:
        plotter.save_plot_of_rotation_angle(all_angles, var_res)
    try:
        var_res=np.array(var_res)
        ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
    except:
        ang_int=0
    return ang_int
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
 def do_work_of_slopes_new(
        box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, image_page_rotated, slope_deskew,
+        textline_mask_tot_ea=None, slope_deskew=0.0,
        logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
    if KERNEL is None:
@ -1641,7 +1616,7 @@ def do_work_of_slopes_new(
    logger.debug('enter do_work_of_slopes_new')
    x, y, w, h = box_text
-    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
+    crop_coor = box2rect(box_text)
    mask_textline = np.zeros(textline_mask_tot_ea.shape)
    mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
    all_text_region_raw = textline_mask_tot_ea * mask_textline
@ -1649,7 +1624,7 @@ def do_work_of_slopes_new(
    img_int_p = all_text_region_raw[:,:]
    img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
-    if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
+    if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
        slope = 0
        slope_for_all = slope_deskew
        all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
@ -1687,9 +1662,12 @@ def do_work_of_slopes_new(
    return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
@wrap_ndarray_shared(kw='mask_texts_only')
 def do_work_of_slopes_new_curved(
        box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
+        textline_mask_tot_ea=None, mask_texts_only=None,
        num_col=1, scale_par=1.0, slope_deskew=0.0,
        logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
 ):
    if KERNEL is None:
@ -1706,7 +1684,7 @@ def do_work_of_slopes_new_curved(
    # plt.imshow(img_int_p)
    # plt.show()
-    if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
+    if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
        slope = 0
        slope_for_all = slope_deskew
    else:
@ -1732,7 +1710,7 @@ def do_work_of_slopes_new_curved(
            slope_for_all = slope_deskew
        slope = slope_for_all
-    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
+    crop_coor = box2rect(box_text)
    if abs(slope_for_all) < 45:
        textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
@ -1765,20 +1743,25 @@ def do_work_of_slopes_new_curved(
                mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
            pixel_img = 1
-            mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
+            mask_biggest2 = resize_image(mask_biggest2,
                                         int(mask_biggest2.shape[0] * scale_par),
                                         int(mask_biggest2.shape[1] * scale_par))
            cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
            try:
                textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
            except Exception as why:
                logger.error(why)
    else:
-        textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True)
+        textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw,
                                                                    slope_for_all, contour_par,
                                                                    box_text, True)
    return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
 def do_work_of_slopes_new_light(
        box_text, contour, contour_par, index_r_con,
-        textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,
+        textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
        logger=None
 ):
    if logger is None:
@ -1786,7 +1769,7 @@ def do_work_of_slopes_new_light(
    logger.debug('enter do_work_of_slopes_new_light')
    x, y, w, h = box_text
-    _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
+    crop_coor = box2rect(box_text)
    mask_textline = np.zeros(textline_mask_tot_ea.shape)
    mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
    all_text_region_raw = textline_mask_tot_ea * mask_textline
--- a/src/eynollah/utils/shm.py
+++ b/src/eynollah/utils/shm.py
@ -0,0 +1,45 @@
 from multiprocessing import shared_memory
 from contextlib import contextmanager
 from functools import wraps
 import numpy as np
@contextmanager
 def share_ndarray(array: np.ndarray):
    size = np.dtype(array.dtype).itemsize * np.prod(array.shape)
    shm = shared_memory.SharedMemory(create=True, size=size)
    try:
        shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf)
        shared_array[:] = array[:]
        shared_array.flags["WRITEABLE"] = False
        yield dict(shape=array.shape, dtype=array.dtype, name=shm.name)
    finally:
        shm.close()
        shm.unlink()
@contextmanager
 def ndarray_shared(array: dict):
    shm = shared_memory.SharedMemory(name=array['name'])
    try:
        array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf)
        yield array
    finally:
        shm.close()
 def wrap_ndarray_shared(kw=None):
    def wrapper(f):
        if kw is None:
            @wraps(f)
            def shared_func(array, *args, **kwargs):
                with ndarray_shared(array) as ndarray:
                    return f(ndarray, *args, **kwargs)
            return shared_func
        else:
            @wraps(f)
            def shared_func(*args, **kwargs):
                array = kwargs.pop(kw)
                with ndarray_shared(array) as ndarray:
                    kwargs[kw] = ndarray
                    return f(*args, **kwargs)
            return shared_func
    return wrapper
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@ -92,6 +92,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t
        return peaks_final
    else:
        return None
 # Function to fit text inside the given area
 def fit_text_single_line(draw, text, font_path, max_width, max_height):
    initial_font_size = 50
@ -369,7 +370,11 @@ def return_textline_contour_with_added_box_coordinate(textline_contour,  box_ind
    return textline_contour
-def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
+def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons,
                                          prediction_model,
                                          b_s_ocr, num_to_char,
                                          textline_light=False,
                                          curved_line=False):
    max_len = 512
    padding_token = 299
    image_width = 512#max_len * 4
@ -425,17 +430,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
                    splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                    if splited_images:
-                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0],
                                                                               image_height,
                                                                               image_width)
                        cropped_lines.append(img_fin)
                        cropped_lines_meging_indexing.append(1)
-                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1],
                                                                               image_height,
                                                                               image_width)
                        cropped_lines.append(img_fin)
                        cropped_lines_meging_indexing.append(-1)
                    else:
-                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop,
                                                                               image_height,
                                                                               image_width)
                        cropped_lines.append(img_fin)
                        cropped_lines_meging_indexing.append(0)
@ -468,7 +479,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
            pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
            extracted_texts.append(pred_texts_ib)
-    extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+    extracted_texts_merged = [extracted_texts[ind]
                              if cropped_lines_meging_indexing[ind]==0
                              else extracted_texts[ind]+" "+extracted_texts[ind+1]
                              if cropped_lines_meging_indexing[ind]==1
                              else None
                              for ind in range(len(cropped_lines_meging_indexing))]
    extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
    unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@ -289,7 +289,7 @@ class EynollahXmlWriter():
        self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
        for mm in range(len(found_polygons_text_region_h)):
-            textregion = TextRegionType(id=counter.next_region_id, type_='header',
+            textregion = TextRegionType(id=counter.next_region_id, type_='heading',
                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
            page.add_TextRegion(textregion)
@ -335,7 +335,7 @@ class EynollahXmlWriter():
            page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
        for mm in range(len(polygons_lines_to_be_written_in_xml)):
-            page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
+            page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
        for mm in range(len(found_polygons_tables)):
            page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))
--- a/tests/test_run.py
+++ b/tests/test_run.py
@ -20,23 +20,9 @@ MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_
 MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
 MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
-def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
-    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
+    "options",
-    outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
+    [
    args = [
        '-m', MODELS_LAYOUT,
        '-i', str(infile),
        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'eynollah'
    runner = CliRunner()
    for options in [
            [], # defaults
            ["--allow_scaling", "--curved-line"],
            ["--allow_scaling", "--curved-line", "--full-layout"],
@ -47,22 +33,34 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
            # -eoi ...
            # --do_ocr
            # --skip_layout_and_reading_order
-    ]:
+    ], ids=str)
-        with subtests.test(#msg="test CLI",
+def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
-                           options=options):
+    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
-            with caplog.filtering(only_eynollah):
+    outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
-                result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
+    args = [
-            assert result.exit_code == 0, result.stdout
+        '-m', MODELS_LAYOUT,
-            logmsgs = [logrec.message for logrec in caplog.records]
+        '-i', str(infile),
-            assert str(infile) in logmsgs
+        '-o', str(outfile.parent),
-            assert outfile.exists()
+    ]
-            tree = page_from_file(str(outfile)).etree
+    if pytestconfig.getoption('verbose') > 0:
-            regions = tree.xpath("//page:TextRegion", namespaces=NS)
+        args.extend(['-l', 'DEBUG'])
-            assert len(regions) >= 2, "result is inaccurate"
+    caplog.set_level(logging.INFO)
-            regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
+    def only_eynollah(logrec):
-            assert len(regions) >= 2, "result is inaccurate"
+        return logrec.name == 'eynollah'
-            lines = tree.xpath("//page:TextLine", namespaces=NS)
+    runner = CliRunner()
-            assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
+    with caplog.filtering(only_eynollah):
        result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
    assert result.exit_code == 0, result.stdout
    logmsgs = [logrec.message for logrec in caplog.records]
    assert str(infile) in logmsgs
    assert outfile.exists()
    tree = page_from_file(str(outfile)).etree
    regions = tree.xpath("//page:TextRegion", namespaces=NS)
    assert len(regions) >= 2, "result is inaccurate"
    regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
    assert len(regions) >= 2, "result is inaccurate"
    lines = tree.xpath("//page:TextLine", namespaces=NS)
    assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
 def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
@ -86,7 +84,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
    assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
    assert len(list(outdir.iterdir())) == 2
-def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
    "options",
    [
            [], # defaults
            ["--no-patches"],
    ], ids=str)
 def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
    args = [
@ -100,25 +104,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
    def only_eynollah(logrec):
        return logrec.name == 'SbbBinarizer'
    runner = CliRunner()
-    for options in [
+    with caplog.filtering(only_eynollah):
-            [], # defaults
+        result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
-            ["--no-patches"],
+    assert result.exit_code == 0, result.stdout
-    ]:
+    logmsgs = [logrec.message for logrec in caplog.records]
-        with subtests.test(#msg="test CLI",
+    assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
-                           options=options):
+    assert outfile.exists()
-            with caplog.filtering(only_eynollah):
+    with Image.open(infile) as original_img:
-                result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
+        original_size = original_img.size
-            assert result.exit_code == 0, result.stdout
+    with Image.open(outfile) as binarized_img:
-            logmsgs = [logrec.message for logrec in caplog.records]
+        binarized_size = binarized_img.size
-            assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
+    assert original_size == binarized_size
            assert outfile.exists()
            with Image.open(infile) as original_img:
                original_size = original_img.size
            with Image.open(outfile) as binarized_img:
                binarized_size = binarized_img.size
            assert original_size == binarized_size
-def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
@ -139,15 +137,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
    assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
    assert len(list(outdir.iterdir())) == 2
-def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
    "options",
    [
            [], # defaults
            ["-sos"],
    ], ids=str)
 def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
    args = [
        '-m', MODELS_LAYOUT,
        '-i', str(infile),
        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
@ -155,25 +157,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
    def only_eynollah(logrec):
        return logrec.name == 'enhancement'
    runner = CliRunner()
-    for options in [
+    with caplog.filtering(only_eynollah):
-            [], # defaults
+        result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
-            ["-sos"],
+    assert result.exit_code == 0, result.stdout
-    ]:
+    logmsgs = [logrec.message for logrec in caplog.records]
-        with subtests.test(#msg="test CLI",
+    assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
-                           options=options):
+    assert outfile.exists()
-            with caplog.filtering(only_eynollah):
+    with Image.open(infile) as original_img:
-                result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
+        original_size = original_img.size
-            assert result.exit_code == 0, result.stdout
+    with Image.open(outfile) as enhanced_img:
-            logmsgs = [logrec.message for logrec in caplog.records]
+        enhanced_size = enhanced_img.size
-            assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
+    assert (original_size == enhanced_size) == ("-sos" in options)
            assert outfile.exists()
            with Image.open(infile) as original_img:
                original_size = original_img.size
            with Image.open(outfile) as enhanced_img:
                enhanced_size = enhanced_img.size
            assert (original_size == enhanced_size) == ("-sos" in options)
-def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
@ -194,7 +190,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
    assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
    assert len(list(outdir.iterdir())) == 2
-def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
    args = [
@ -223,7 +219,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
    #assert in_order != out_order
    assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
-def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):
+def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
@ -245,7 +241,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
    #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
    assert len(list(outdir.iterdir())) == 2
-def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
+@pytest.mark.parametrize(
    "options",
    [
        [], # defaults
        ["-doit", #str(outrenderfile.parent)],
         ],
        ["-trocr"],
    ], ids=str)
 def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
    outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
@ -255,8 +259,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
        '-i', str(infile),
        '-dx', str(infile.parent),
        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
@ -264,33 +266,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
    def only_eynollah(logrec):
        return logrec.name == 'eynollah'
    runner = CliRunner()
-    for options in [
+    if "-doit" in options:
-            # kba  Fri Sep 26 12:53:49 CEST 2025
+        options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
-            # Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
+    with caplog.filtering(only_eynollah):
-            # [], # defaults
+        result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
-            # ["-doit", str(outrenderfile.parent)],
+    assert result.exit_code == 0, result.stdout
-            ["-trocr"],
+    logmsgs = [logrec.message for logrec in caplog.records]
-    ]:
+    # FIXME: ocr has no logging!
-        with subtests.test(#msg="test CLI",
+    #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
-                           options=options):
+    assert outfile.exists()
-            with caplog.filtering(only_eynollah):
+    if "-doit" in options:
-                result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
+        assert outrenderfile.exists()
-            assert result.exit_code == 0, result.stdout
+    #in_tree = page_from_file(str(infile)).etree
-            logmsgs = [logrec.message for logrec in caplog.records]
+    #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
-            # FIXME: ocr has no logging!
+    out_tree = page_from_file(str(outfile)).etree
-            #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
+    out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
-            assert outfile.exists()
+    assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
-            if "-doit" in options:
+    assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
                assert outrenderfile.exists()
            #in_tree = page_from_file(str(infile)).etree
            #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
            out_tree = page_from_file(str(outfile)).etree
            out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
            assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
            assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
-@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged")
+def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
 def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [