diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 042e508..466e690 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -24,24 +24,39 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" df -h - uses: actions/checkout@v4 - - uses: actions/cache@v4 + - uses: actions/cache/restore@v4 id: seg_model_cache with: path: models_layout_v0_5_0 - key: ${{ runner.os }}-models - - uses: actions/cache@v4 + key: seg-models + - uses: actions/cache/restore@v4 id: ocr_model_cache with: - path: models_ocr_v0_5_0 - key: ${{ runner.os }}-models - - uses: actions/cache@v4 + path: models_ocr_v0_5_1 + key: ocr-models + - uses: actions/cache/restore@v4 id: bin_model_cache with: path: default-2021-03-09 - key: ${{ runner.os }}-modelbin + key: bin-models - name: Download models if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true run: make models + - uses: actions/cache/save@v4 + if: steps.seg_model_cache.outputs.cache-hit != 'true' + with: + path: models_layout_v0_5_0 + key: seg-models + - uses: actions/cache/save@v4 + if: steps.ocr_model_cache.outputs.cache-hit != 'true' + with: + path: models_ocr_v0_5_1 + key: ocr-models + - uses: actions/cache/save@v4 + if: steps.bin_model_cache.outputs.cache-hit != 'true' + with: + path: default-2021-03-09 + key: bin-models - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -50,7 +65,12 @@ jobs: run: | python -m pip install --upgrade pip make install-dev EXTRAS=OCR,plotting - make deps-test + make deps-test EXTRAS=OCR,plotting + ls -l models_* + - name: Lint with ruff + uses: astral-sh/ruff-action@v3 + with: + src: "./src" - name: Test with pytest run: make coverage PYTEST_ARGS="-vv --junitxml=pytest.xml" - name: Get coverage results diff --git a/.gitignore b/.gitignore index da03449..fd64f0b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,12 @@ __pycache__ sbb_newspapers_org_image/pylint.log models_eynollah* +models_ocr* +models_layout* +default-2021-03-09 output.html /build /dist *.tif *.sw? +TAGS diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c6c000..10b3923 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,17 +5,123 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + * `join_polygons` always returning Polygon, not MultiPolygon, #203 + +## [0.6.0rc2] - 2025-10-14 + +Fixed: + + * Prevent OOM GPU error by avoiding loading the `region_fl` model, #199 + * XML output: encoding should be `utf-8`, not `utf8`, #196, #197 + +## [0.6.0rc1] - 2025-10-10 + +Fixed: + + * continue processing when no columns detected but text regions exist + * convert marginalia to main text if no main text is present + * reset deskewing angle to 0° when text covers <30% image area and detected angle >45° + * :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.) + * `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify + * `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring + * `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice + * `get_marginals`: exit early if no peaks found to avoid spurious overlap mask + * `get_smallest_skew`: after shifting search range of rotation angle, use overall best result + * Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR) + * OCR: re-instate missing methods and fix `utils_ocr` function calls + * mbreorder/enhancement CLIs: missing imports + * :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`), f458e3e + * tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate` + (so CUDA memory gets freed between tests if running on GPU) + +Added: + * :fire: `layout` CLI: new option `--model_version` to override default choices + * test coverage for OCR options in `layout` + * test coverage for table detection in `layout` + * CI linting with ruff + +Changed: + + * polygons: slightly widen for regions and lines, increase for separators + * various refactorings, some code style and identifier improvements + * deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster), + but use shared memory if necessary, and switch back from `loky` to stdlib, + and shutdown in `del()` instead of `atexit` + * :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too + * OCR: allow running `-tr` without `-fl`, too + * :fire: writer: use `@type='heading'` instead of `'header'` for headings + * :fire: performance gains via refactoring (simplification, less copy-code, vectorization, + avoiding unused calculations, avoiding unnecessary 3-channel image operations) + * :fire: heuristic reading order detection: many improvements + - contour vs splitter box matching: + * contour must be contained in box exactly instead of heuristics + * make fallback center matching, center must be contained in box + - original vs deskewed contour matching: + * same min-area filter on both sides + * similar area score in addition to center proximity + * avoid duplicate and missing mappings by allowing N:M + matches and splitting+joining where necessary + * CI: update+improve model caching + + ## [0.5.0] - 2025-09-26 Fixed: * restoring the contour in the original image caused an error due to an empty tuple, #154 + * removed NumPy warnings calculating sigma, mean, (fixed issue #158) + * fixed bug in `separate_lines.py`, #124 + * Drop capitals are now handled separately from their corresponding textline + * Marginals are now divided into left and right. Their reading order is written first for left marginals, then for right marginals, and within each side from top to bottom + * Added a new page extraction model. Instead of bounding boxes, it outputs page contours in the XML file, improving results for skewed pages + * Improved reading order for cases where a textline is segmented into multiple smaller textlines + +Changed + + * CLIs: read only allowed filename suffixes (image or XML) with `--dir_in` + * CLIs: make all output option required, and `-i` / `-di` required but mutually exclusive + * ocr CLI: drop redundant `-brb` in favour of just `-dib` + * APIs: move all input/output path options from class (kwarg and attribute) ro `run` kwarg + * layout textlines: polygonal also without `-cl` Added: * `eynollah machine-based-reading-order` CLI to run reading order detection, #175 * `eynollah enhancement` CLI to run image enhancement, #175 * Improved models for page extraction and reading order detection, #175 + * For the lightweight version (layout and textline detection), thresholds are now assigned to the artificial class. Users can apply these thresholds to improve detection of isolated textlines and regions. To counteract the drawback of thresholding, the skeleton of the artificial class is used to keep lines as thin as possible (resolved issues #163 and #161) + * Added and integrated a trained CNN-RNN OCR models + * Added and integrated a trained TrOCR model + * Improved OCR detection to support vertical and curved textlines + * Introduced a new machine-based reading order model with rotation augmentation + * Optimized reading order speed by clustering text regions that belong to the same block, maintaining top-to-bottom order + * Implemented text merging across textlines based on hyphenation when a line ends with a hyphen + * Integrated image enhancement as a separate use case + * Added reading order functionality on the layout level as a separate use case + * CNN-RNN OCR models provide confidence scores for predictions + * Added OCR visualization: predicted OCR can be overlaid on an image of the same size as the input + * Introduced a threshold value for CNN-RNN OCR models, allowing users to filter out low-confidence textline predictions + * For OCR, users can specify a single model by name instead of always using the default model + * Under the OCR use case, if Ground Truth XMLs and images are available, textline image and corresponding text extraction can now be performed + +Merged PRs: + + * better machine based reading order + layout and textline + ocr by @vahidrezanezhad in https://github.com/qurator-spk/eynollah/pull/175 + * CI: pypi by @kba in https://github.com/qurator-spk/eynollah/pull/154 + * CI: Use most recent actions/setup-python@v5 by @kba in https://github.com/qurator-spk/eynollah/pull/157 + * update docker by @bertsky in https://github.com/qurator-spk/eynollah/pull/159 + * Ocrd fixes by @kba in https://github.com/qurator-spk/eynollah/pull/167 + * Updating readme for eynollah use cases cli by @kba in https://github.com/qurator-spk/eynollah/pull/166 + * OCR-D processor: expose reading_order_machine_based by @bertsky in https://github.com/qurator-spk/eynollah/pull/171 + * prepare release v0.5.0: fix logging by @bertsky in https://github.com/qurator-spk/eynollah/pull/180 + * mb_ro_on_layout: remove copy-pasta code not actually used by @kba in https://github.com/qurator-spk/eynollah/pull/181 + * prepare release v0.5.0: improve CLI docstring, refactor I/O path options from class to run kwargs, increase test coverage @bertsky in #182 + * prepare release v0.5.0: fix for OCR doit subtest by @bertsky in https://github.com/qurator-spk/eynollah/pull/183 + * Prepare release v0.5.0 by @kba in https://github.com/qurator-spk/eynollah/pull/178 + * updating eynollah README, how to use it for use cases by @vahidrezanezhad in https://github.com/qurator-spk/eynollah/pull/156 + * add feedback to command line interface by @michalbubula in https://github.com/qurator-spk/eynollah/pull/170 ## [0.4.0] - 2025-04-07 @@ -195,6 +301,8 @@ Fixed: Initial release +[0.6.0rc2]: ../../compare/v0.6.0rc2...v0.6.0rc1 +[0.6.0rc1]: ../../compare/v0.6.0rc1...v0.5.0 [0.5.0]: ../../compare/v0.5.0...v0.4.0 [0.4.0]: ../../compare/v0.4.0...v0.3.1 [0.3.1]: ../../compare/v0.3.1...v0.3.0 diff --git a/Dockerfile b/Dockerfile index 4ba498b..a15776e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json # install everything and reduce image size RUN make install EXTRAS=OCR && rm -rf /build/eynollah +# fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow) +RUN pip install nvidia-cudnn-cu11==8.6.0.163 # smoke test RUN eynollah --help diff --git a/Makefile b/Makefile index a920615..29dd877 100644 --- a/Makefile +++ b/Makefile @@ -13,12 +13,18 @@ DOCKER ?= docker #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 +SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL))) +SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%) BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip +BIN_MODELFILE = $(notdir $(BIN_MODEL)) +BIN_MODELNAME := default-2021-03-09 -OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1 +OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1 +OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL))) +OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%) -PYTEST_ARGS ?= -vv +PYTEST_ARGS ?= -vv --isolate # BEGIN-EVAL makefile-parser --make-help Makefile @@ -31,7 +37,8 @@ help: @echo " install Install package with pip" @echo " install-dev Install editable with pip" @echo " deps-test Install test dependencies with pip" - @echo " models Download and extract models to $(CURDIR)/models_layout_v0_5_0" + @echo " models Download and extract models to $(CURDIR):" + @echo " $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)" @echo " smoke-test Run simple CLI check" @echo " ocrd-test Run OCR-D CLI check" @echo " test Run unit tests" @@ -42,33 +49,32 @@ help: @echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]" @echo " SEG_MODEL URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]" @echo " BIN_MODEL URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]" + @echo " OCR_MODEL URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]" @echo "" # END-EVAL # Download and extract models to $(PWD)/models_layout_v0_5_0 -models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09 +models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) -models_layout_v0_5_0: models_layout_v0_5_0.tar.gz - tar zxf models_layout_v0_5_0.tar.gz +# do not download these files if we already have the directories +.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE) -models_layout_v0_5_0.tar.gz: +$(BIN_MODELFILE): + wget -O $@ $(BIN_MODEL) +$(SEG_MODELFILE): wget -O $@ $(SEG_MODEL) - -models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz - tar zxf models_ocr_v0_5_0.tar.gz - -models_ocr_v0_5_0.tar.gz: +$(OCR_MODELFILE): wget -O $@ $(OCR_MODEL) -default-2021-03-09: $(notdir $(BIN_MODEL)) - unzip $(notdir $(BIN_MODEL)) +$(BIN_MODELNAME): $(BIN_MODELFILE) mkdir $@ - mv $(basename $(notdir $(BIN_MODEL))) $@ - -$(notdir $(BIN_MODEL)): - wget $(BIN_MODEL) + unzip -d $@ $< +$(SEG_MODELNAME): $(SEG_MODELFILE) + tar zxf $< +$(OCR_MODELNAME): $(OCR_MODELFILE) + tar zxf $< build: $(PIP) install build @@ -82,28 +88,34 @@ install: install-dev: $(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)]) -deps-test: models_layout_v0_5_0 +ifeq (OCR,$(findstring OCR, $(EXTRAS))) +deps-test: $(OCR_MODELNAME) +endif +deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME) $(PIP) install -r requirements-test.txt +ifeq (OCR,$(findstring OCR, $(EXTRAS))) + ln -rs $(OCR_MODELNAME)/* $(SEG_MODELNAME)/ +endif smoke-test: TMPDIR != mktemp -d smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif # layout analysis: - eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 + eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME) fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(= 0.23.2 tensorflow < 2.13 numba <= 0.58.1 scikit-image -loky biopython diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 93bb676..c9bad52 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -202,6 +202,13 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low type=click.Path(exists=True, file_okay=False), required=True, ) +@click.option( + "--model_version", + "-mv", + help="override default versions of model categories", + type=(str, str), + multiple=True, +) @click.option( "--save_images", "-si", @@ -373,7 +380,7 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low help="Setup a basic console logger", ) -def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging): +def layout(image, out, overwrite, dir_in, model, model_version, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging): if setup_logging: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) @@ -404,6 +411,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." eynollah = Eynollah( model, + model_versions=model_version, extract_only_images=extract_only_images, enable_plotting=enable_plotting, allow_enhancement=allow_enhancement, diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 63f7005..13acba6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -19,7 +19,7 @@ import math import os import sys import time -from typing import Optional +from typing import Dict, List, Optional, Tuple import atexit import warnings from functools import partial @@ -28,10 +28,12 @@ from multiprocessing import cpu_count import gc import copy import json -from loky import ProcessPoolExecutor + +from concurrent.futures import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 import numpy as np +import shapely.affinity from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d from numba import cuda @@ -69,6 +71,7 @@ from .utils.contour import ( filter_contours_area_of_image, filter_contours_area_of_image_tables, find_contours_mean_y_diff, + find_center_of_contours, find_new_features_of_contours, find_features_of_contours, get_text_region_boxes_by_given_contours, @@ -76,9 +79,14 @@ from .utils.contour import ( get_textregion_contours_in_org_image_light, return_contours_of_image, return_contours_of_interested_region, - return_contours_of_interested_region_by_min_size, return_contours_of_interested_textline, return_parent_contours, + dilate_textregion_contours, + dilate_textline_contours, + polygon2contour, + contour2polygon, + join_polygons, + make_intersection, ) from .utils.rotate import ( rotate_image, @@ -87,6 +95,7 @@ from .utils.rotate import ( rotation_image_new ) from .utils.utils_ocr import ( + return_start_and_end_of_common_text_of_textline_ocr_without_common_section, return_textline_contour_with_added_box_coordinate, preprocess_and_resize_image_for_ocrcnn_model, return_textlines_split_if_needed, @@ -99,10 +108,8 @@ from .utils.utils_ocr import ( get_contours_and_bounding_boxes ) from .utils.separate_lines import ( - textline_contours_postprocessing, separate_lines_new2, return_deskew_slop, - return_deskew_slop_old_mp, do_work_of_slopes_new, do_work_of_slopes_new_curved, do_work_of_slopes_new_light, @@ -113,10 +120,13 @@ from .utils.drop_capitals import ( ) from .utils.marginals import get_marginals from .utils.resize import resize_image +from .utils.shm import share_ndarray from .utils import ( is_image_filename, boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, + box2rect, + box2slice, find_num_col, otsu_copy_binary, put_drop_out_from_only_drop_model, @@ -170,7 +180,6 @@ class Patches(layers.Layer): }) return config - class PatchEncoder(layers.Layer): def __init__(self, **kwargs): super(PatchEncoder, self).__init__() @@ -198,6 +207,7 @@ class Eynollah: def __init__( self, dir_models : str, + model_versions: List[Tuple[str, str]] = [], extract_only_images : bool =False, enable_plotting : bool = False, allow_enhancement : bool = False, @@ -244,6 +254,10 @@ class Eynollah: self.skip_layout_and_reading_order = skip_layout_and_reading_order self.ocr = do_ocr self.tr = transformer_ocr + if not batch_size_ocr: + self.b_s_ocr = 8 + else: + self.b_s_ocr = int(batch_size_ocr) if num_col_upper: self.num_col_upper = int(num_col_upper) else: @@ -252,6 +266,9 @@ class Eynollah: self.num_col_lower = int(num_col_lower) else: self.num_col_lower = num_col_lower + + # for parallelization of CPU-intensive tasks: + self.executor = ProcessPoolExecutor(max_workers=cpu_count()) if threshold_art_class_layout: self.threshold_art_class_layout = float(threshold_art_class_layout) @@ -262,69 +279,9 @@ class Eynollah: self.threshold_art_class_textline = float(threshold_art_class_textline) else: self.threshold_art_class_textline = 0.1 - - self.dir_models = dir_models - self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" - self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" - self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" - self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" - self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - #"/modelens_full_lay_1_3_031124" - #"/modelens_full_lay_13__3_19_241024" - #"/model_full_lay_13_241024" - #"/modelens_full_lay_13_17_231024" - #"/modelens_full_lay_1_2_221024" - #"/eynollah-full-regions-1column_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124" - #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" - self.model_page_dir = dir_models + "/model_eynollah_page_extraction_20250915" - self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" - self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_mb_ro_aug_ens_11"#"/model_step_3200000_mb_ro"#"/model_ens_reading_order_machine_based"#"/model_mb_ro_aug_ens_8"#"/model_ens_reading_order_machine_based" - #"/modelens_12sp_elay_0_3_4__3_6_n" - #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" - #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" - #"/modelens_1_2_4_5_early_lay_1_2_spaltige" - #"/model_3_eraly_layout_no_patches_1_2_spaltige" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024" - ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - #"/modelens_full_lay_1_3_031124" - #"/modelens_full_lay_13__3_19_241024" - #"/model_full_lay_13_241024" - #"/modelens_full_lay_13_17_231024" - #"/modelens_full_lay_1_2_221024" - #"/modelens_full_layout_24_till_28" - #"/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124" - if self.textline_light: - #"/modelens_textline_1_4_16092024" - #"/model_textline_ens_3_4_5_6_artificial" - #"/modelens_textline_1_3_4_20240915" - #"/model_textline_ens_3_4_5_6_artificial" - #"/modelens_textline_9_12_13_14_15" - #"/eynollah-textline_light_20210425" - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" - else: - #"/eynollah-textline_20210425" - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" - if self.ocr and self.tr: - self.model_ocr_dir = dir_models + "/model_eynollah_ocr_trocr_20250919" - elif self.ocr and not self.tr: - self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250904" - if self.tables: - if self.light_version: - self.model_table_dir = dir_models + "/modelens_table_0t4_201124" - else: - self.model_table_dir = dir_models + "/eynollah-tables_20210319" - - + t_start = time.time() - # for parallelization of CPU-intensive tasks: - self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) - atexit.register(self.executor.shutdown) - # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) @@ -340,61 +297,157 @@ class Eynollah: self.logger.warning("no GPU device available") self.logger.info("Loading models...") - - self.model_page = self.our_load_model(self.model_page_dir) - self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) - self.model_bin = self.our_load_model(self.model_dir_of_binarization) - if self.extract_only_images: - self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) - else: - self.model_textline = self.our_load_model(self.model_textline_dir) + self.setup_models(dir_models, model_versions) + self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)") + + @staticmethod + def our_load_model(model_file, basedir=""): + if basedir: + model_file = os.path.join(basedir, model_file) + if model_file.endswith('.h5') and Path(model_file[:-3]).exists(): + # prefer SavedModel over HDF5 format if it exists + model_file = model_file[:-3] + try: + model = load_model(model_file, compile=False) + except: + model = load_model(model_file, compile=False, custom_objects={ + "PatchEncoder": PatchEncoder, "Patches": Patches}) + return model + + def setup_models(self, basedir: Path, model_versions: List[Tuple[str, str]] = []): + self.model_versions = { + "enhancement": "eynollah-enhancement_20210425", + "binarization": "eynollah-binarization_20210425", + "col_classifier": "eynollah-column-classifier_20210425", + "page": "model_eynollah_page_extraction_20250915", + #?: "eynollah-main-regions-aug-scaling_20210425", + "region": ( # early layout + "eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" if self.extract_only_images else + "eynollah-main-regions_20220314" if self.light_version else + "eynollah-main-regions-ensembled_20210425"), + "region_p2": ( # early layout, non-light, 2nd part + "eynollah-main-regions-aug-rotation_20210425"), + "region_1_2": ( # early layout, light, 1-or-2-column + #"modelens_12sp_elay_0_3_4__3_6_n" + #"modelens_earlylayout_12spaltige_2_3_5_6_7_8" + #"modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" + #"modelens_1_2_4_5_early_lay_1_2_spaltige" + #"model_3_eraly_layout_no_patches_1_2_spaltige" + "modelens_e_l_all_sp_0_1_2_3_4_171024"), + "region_fl_np": ( # full layout / no patches + #"modelens_full_lay_1_3_031124" + #"modelens_full_lay_13__3_19_241024" + #"model_full_lay_13_241024" + #"modelens_full_lay_13_17_231024" + #"modelens_full_lay_1_2_221024" + #"eynollah-full-regions-1column_20210425" + "modelens_full_lay_1__4_3_091124"), + "region_fl": ( # full layout / with patches + #"eynollah-full-regions-3+column_20210425" + ##"model_2_full_layout_new_trans" + #"modelens_full_lay_1_3_031124" + #"modelens_full_lay_13__3_19_241024" + #"model_full_lay_13_241024" + #"modelens_full_lay_13_17_231024" + #"modelens_full_lay_1_2_221024" + #"modelens_full_layout_24_till_28" + #"model_2_full_layout_new_trans" + "modelens_full_lay_1__4_3_091124"), + "reading_order": ( + #"model_mb_ro_aug_ens_11" + #"model_step_3200000_mb_ro" + #"model_ens_reading_order_machine_based" + #"model_mb_ro_aug_ens_8" + #"model_ens_reading_order_machine_based" + "model_eynollah_reading_order_20250824"), + "textline": ( + #"modelens_textline_1_4_16092024" + #"model_textline_ens_3_4_5_6_artificial" + #"modelens_textline_1_3_4_20240915" + #"model_textline_ens_3_4_5_6_artificial" + #"modelens_textline_9_12_13_14_15" + #"eynollah-textline_light_20210425" + "modelens_textline_0_1__2_4_16092024" if self.textline_light else + #"eynollah-textline_20210425" + "modelens_textline_0_1__2_4_16092024"), + "table": ( + None if not self.tables else + "modelens_table_0t4_201124" if self.light_version else + "eynollah-tables_20210319"), + "ocr": ( + None if not self.ocr else + "model_eynollah_ocr_trocr_20250919" if self.tr else + "model_eynollah_ocr_cnnrnn_20250930") + } + # override defaults from CLI + for key, val in model_versions: + assert key in self.model_versions, "unknown model category '%s'" % key + self.logger.warning("overriding default model %s version %s to %s", key, self.model_versions[key], val) + self.model_versions[key] = val + # load models, depending on modes + # (note: loading too many models can cause OOM on GPU/CUDA, + # thus, we try set up the minimal configuration for the current mode) + loadable = [ + "col_classifier", + "binarization", + "page", + "region" + ] + if not self.extract_only_images: + loadable.append("textline") if self.light_version: - self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) - self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) + loadable.append("region_1_2") else: - self.model_region = self.our_load_model(self.model_region_dir_p_ens) - self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) - self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) - ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) - self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + loadable.append("region_p2") + # if self.allow_enhancement:? + loadable.append("enhancement") + if self.full_layout: + loadable.append("region_fl_np") + #loadable.append("region_fl") if self.reading_order_machine_based: - self.model_reading_order = self.our_load_model(self.model_reading_order_dir) - if self.ocr and self.tr: - self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") - self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") - elif self.ocr and not self.tr: - model_ocr = load_model(self.model_ocr_dir , compile=False) - - self.prediction_model = tf.keras.models.Model( - model_ocr.get_layer(name = "image").input, - model_ocr.get_layer(name = "dense2").output) - if not batch_size_ocr: - self.b_s_ocr = 8 + loadable.append("reading_order") + if self.tables: + loadable.append("table") + + self.models = {name: self.our_load_model(self.model_versions[name], basedir) + for name in loadable + } + + if self.ocr: + ocr_model_dir = os.path.join(basedir, self.model_versions["ocr"]) + if self.tr: + self.models["ocr"] = VisionEncoderDecoderModel.from_pretrained(ocr_model_dir) + if torch.cuda.is_available(): + self.logger.info("Using GPU acceleration") + self.device = torch.device("cuda:0") else: - self.b_s_ocr = int(batch_size_ocr) - + self.logger.info("Using CPU processing") + self.device = torch.device("cpu") + #self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + else: + ocr_model = load_model(ocr_model_dir, compile=False) + self.models["ocr"] = tf.keras.models.Model( + ocr_model.get_layer(name = "image").input, + ocr_model.get_layer(name = "dense2").output) - with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: + with open(os.path.join(ocr_model_dir, "characters_org.txt"), "r") as config_file: characters = json.load(config_file) - - - AUTOTUNE = tf.data.AUTOTUNE - # Mapping characters to integers. char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) - # Mapping integers back to original characters. self.num_to_char = StringLookup( vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True ) - - if self.tables: - self.model_table = self.our_load_model(self.model_table_dir) - - self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)") + + def __del__(self): + if hasattr(self, 'executor') and getattr(self, 'executor'): + self.executor.shutdown() + self.executor = None + if hasattr(self, 'models') and getattr(self, 'models'): + for model_name in list(self.models): + if self.models[model_name]: + del self.models[model_name] def cache_images(self, image_filename=None, image_pil=None, dpi=None): ret = {} @@ -441,8 +494,8 @@ class Eynollah: def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") - img_height_model = self.model_enhancement.layers[-1].output_shape[1] - img_width_model = self.model_enhancement.layers[-1].output_shape[2] + img_height_model = self.models["enhancement"].layers[-1].output_shape[1] + img_width_model = self.models["enhancement"].layers[-1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) if img.shape[1] < img_width_model: @@ -483,7 +536,7 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) + label_p_pred = self.models["enhancement"].predict(img_patch, verbose=0) seg = label_p_pred[0, :, :, :] * 255 if i == 0 and j == 0: @@ -658,7 +711,7 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) @@ -676,8 +729,8 @@ class Eynollah: self.logger.info("Detected %s DPI", dpi) if self.input_binary: img = self.imread() - prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = self.do_prediction(True, img, self.models["binarization"], n_batch_inference=5) + prediction_bin = 255 * (prediction_bin[:,:,0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) img= np.copy(prediction_bin) img_bin = prediction_bin @@ -716,7 +769,7 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): @@ -737,7 +790,7 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 if num_col > self.num_col_upper: @@ -836,9 +889,11 @@ class Eynollah: self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, - thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1): + thresholding_for_artificial_class_in_light_version=False, + thresholding_for_fl_light_version=False, + threshold_art_class_textline=0.1): - self.logger.debug("enter do_prediction") + self.logger.debug("enter do_prediction (patches=%d)", patches) img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -889,10 +944,8 @@ class Eynollah: img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) + nxf = math.ceil(img_w / float(width_mid)) + nyf = math.ceil(img_h / float(height_mid)) list_i_s = [] list_j_s = [] @@ -905,18 +958,10 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model @@ -1250,7 +1295,9 @@ class Eynollah: self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, - thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1, threshold_art_class_layout=0.1): + thresholding_for_artificial_class_in_light_version=False, + threshold_art_class_textline=0.1, + threshold_art_class_layout=0.1): self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] @@ -1380,7 +1427,8 @@ class Eynollah: for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): seg_in_art = seg_art[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] @@ -1400,7 +1448,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[0:-margin or None, @@ -1417,7 +1466,8 @@ class Eynollah: label_p_pred[0, margin:, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[margin:, @@ -1435,7 +1485,8 @@ class Eynollah: 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[margin:, @@ -1452,7 +1503,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[0:-margin or None, @@ -1469,7 +1521,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[margin:-margin or None, @@ -1485,7 +1538,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[margin:-margin or None, @@ -1501,7 +1555,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[0:-margin or None, @@ -1517,7 +1572,8 @@ class Eynollah: label_p_pred[0, margin:, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[margin:, @@ -1533,7 +1589,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[margin:-margin or None, @@ -1585,7 +1642,7 @@ class Eynollah: cont_page = [] if not self.ignore_page_extraction: img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, self.model_page) + img_page_prediction = self.do_prediction(False, img, self.models["page"]) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) ##thresh = cv2.dilate(thresh, KERNEL, iterations=3) @@ -1633,7 +1690,7 @@ class Eynollah: else: img = self.imread() img = cv2.GaussianBlur(img, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, self.model_page) + img_page_prediction = self.do_prediction(False, img, self.models["page"]) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -1659,7 +1716,7 @@ class Eynollah: self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] - model_region = self.model_region_fl if patches else self.model_region_fl_np + model_region = self.models["region_fl"] if patches else self.models["region_fl_np"] if self.light_version: thresholding_for_fl_light_version = True @@ -1682,7 +1739,10 @@ class Eynollah: else: img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3, thresholding_for_fl_light_version=thresholding_for_fl_light_version) + prediction_regions = self.do_prediction(patches, img, model_region, + marginal_of_patch_percent=0.1, + n_batch_inference=3, + thresholding_for_fl_light_version=thresholding_for_fl_light_version) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -1691,7 +1751,7 @@ class Eynollah: self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] - model_region = self.model_region_fl if patches else self.model_region_fl_np + model_region = self.models["region_fl"] if patches else self.models["region_fl_np"] if not patches: img = otsu_copy_binary(img) @@ -1810,18 +1870,13 @@ class Eynollah: return sorted_textlines - - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light2(self, contours_par, textline_mask_tot, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) - M_main_tot = [cv2.moments(polygons_of_textlines[j]) - for j in range(len(polygons_of_textlines))] + cx_main_tot, cy_main_tot = find_center_of_contours(polygons_of_textlines) + w_h_textlines = [cv2.boundingRect(polygon)[2:] for polygon in polygons_of_textlines] - w_h_textlines = [cv2.boundingRect(polygons_of_textlines[i])[2:] for i in range(len(polygons_of_textlines))] - cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - - args_textlines = np.array(range(len(polygons_of_textlines))) + args_textlines = np.arange(len(polygons_of_textlines)) all_found_textline_polygons = [] slopes = [] all_box_coord =[] @@ -1836,55 +1891,65 @@ class Eynollah: cy_textline_in = [cy_main_tot[ind] for ind in indexes_in] w_h_textlines_in = [w_h_textlines[ind][0] / float(w_h_textlines[ind][1]) for ind in indexes_in] - textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, cx_textline_in, cy_textline_in, w_h_textlines_in) + textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, + cx_textline_in, + cy_textline_in, + w_h_textlines_in) all_found_textline_polygons.append(textlines_ins)#[::-1]) slopes.append(slope_deskew) - _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + crop_coor = box2rect(boxes[index]) all_box_coord.append(crop_coor) - return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes + return (all_found_textline_polygons, + all_box_coord, + slopes) - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): - return [], [], [], [], [], [], [] + return [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") - results = self.executor.map(partial(do_work_of_slopes_new_light, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew,textline_light=self.textline_light, - logger=self.logger,), - boxes, contours, contours_par, range(len(contours_par))) - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + results = self.executor.map(partial(do_work_of_slopes_new_light, + textline_mask_tot_ea=textline_mask_tot_shared, + slope_deskew=slope_deskew, + textline_light=self.textline_light, + logger=self.logger,), + boxes, contours, contours_par) + results = list(results) # exhaust prior to release + #textline_polygons, box_coord, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): - return [], [], [], [], [], [], [] + return [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") - results = self.executor.map(partial(do_work_of_slopes_new, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - logger=self.logger, - plotter=self.plotter,), - boxes, contours, contours_par, range(len(contours_par))) - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + results = self.executor.map(partial(do_work_of_slopes_new, + textline_mask_tot_ea=textline_mask_tot_shared, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par) + results = list(results) # exhaust prior to release + #textline_polygons, box_coord, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): - if not len(contours): - return [], [], [], [], [], [], [] + def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes, + mask_texts_only, num_col, scale_par, slope_deskew): + if not len(contours_par): + return [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") - results = self.executor.map(partial(do_work_of_slopes_new_curved, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - mask_texts_only=mask_texts_only, + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + with share_ndarray(mask_texts_only) as mask_texts_only_shared: + results = self.executor.map(partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot_shared, + mask_texts_only=mask_texts_only_shared, num_col=num_col, scale_par=scale_par, slope_deskew=slope_deskew, @@ -1892,8 +1957,9 @@ class Eynollah: KERNEL=KERNEL, logger=self.logger, plotter=self.plotter,), - boxes, contours, contours_par, range(len(contours_par))) - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + boxes, contours_par) + results = list(results) # exhaust prior to release + #textline_polygons, box_coord, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) @@ -1906,13 +1972,14 @@ class Eynollah: img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction( - use_patches, img, self.model_textline, - marginal_of_patch_percent=0.15, n_batch_inference=3, - thresholding_for_artificial_class_in_light_version=self.textline_light, threshold_art_class_textline=self.threshold_art_class_textline) + prediction_textline = self.do_prediction(use_patches, img, self.models["textline"], + marginal_of_patch_percent=0.15, + n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light, + threshold_art_class_textline=self.threshold_art_class_textline) #if not self.textline_light: #if num_col_classifier==1: - #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) + #prediction_textline_nopatch = self.do_prediction(False, img, self.models["textline"]) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) @@ -1983,7 +2050,7 @@ class Eynollah: #cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0]) - prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) + prediction_textline_longshot = self.do_prediction(False, img, self.models["textline"]) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) @@ -1994,43 +2061,6 @@ class Eynollah: (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8)) - def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): - self.logger.debug('enter do_work_of_slopes') - slope_biggest = 0 - slopes_sub = [] - boxes_sub_new = [] - poly_sub = [] - for mv in range(len(boxes_per_process)): - crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) - crop_img = crop_img[:, :, 0] - crop_img = cv2.erode(crop_img, KERNEL, iterations=2) - try: - textline_con, hierarchy = return_contours_of_image(crop_img) - textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) - sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) - crop_img[crop_img > 0] = 1 - slope_corresponding_textregion = return_deskew_slop_old_mp(crop_img, sigma_des, - logger=self.logger, plotter=self.plotter) - except Exception as why: - self.logger.error(why) - slope_corresponding_textregion = MAX_SLOPE - - if slope_corresponding_textregion == MAX_SLOPE: - slope_corresponding_textregion = slope_biggest - slopes_sub.append(slope_corresponding_textregion) - - cnt_clean_rot = textline_contours_postprocessing( - crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) - - poly_sub.append(cnt_clean_rot) - boxes_sub_new.append(boxes_per_process[mv]) - - q.put(slopes_sub) - poly.put(poly_sub) - box_sub.put(boxes_sub_new) - self.logger.debug('exit do_work_of_slopes') - def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_extract_images_only") erosion_hurts = False @@ -2053,7 +2083,7 @@ class Eynollah: img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) - prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_region) + prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.models["region"]) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) image_page, page_coord, cont_page = self.extract_page() @@ -2065,9 +2095,9 @@ class Eynollah: mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2106,7 +2136,7 @@ class Eynollah: ##polygons_of_images_fin.append(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind) - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2120,7 +2150,7 @@ class Eynollah: if h < 150 or w < 150: pass else: - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2131,9 +2161,15 @@ class Eynollah: [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page + return (text_regions_p_true, + erosion_hurts, + polygons_seplines, + polygons_of_images_fin, + image_page, + page_coord, + cont_page) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -2163,7 +2199,7 @@ class Eynollah: #if self.input_binary: #img_bin = np.copy(img_resized) ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): - ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + ###prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5) ####print("inside bin ", time.time()-t_bin) ###prediction_bin=prediction_bin[:,:,0] @@ -2178,7 +2214,7 @@ class Eynollah: ###else: ###img_bin = np.copy(img_resized) if (self.ocr and self.tr) and not self.input_binary: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5) prediction_bin = 255 * (prediction_bin[:,:,0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = prediction_bin.astype(np.uint16) @@ -2189,7 +2225,8 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) - self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) + self.logger.debug("detecting textlines on %s with %d colors", + str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -2198,102 +2235,109 @@ class Eynollah: #plt.imshwo(self.image_page_org_size) #plt.show() - if not skip_layout_and_reading_order: - #print("inside 2 ", time.time()-t_in) - if num_col_classifier == 1 or num_col_classifier == 2: - if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - self.logger.debug("resized to %dx%d for %d cols", - img_resized.shape[1], img_resized.shape[0], num_col_classifier) - prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( - True, img_resized, self.model_region_1_2, n_batch_inference=1, - thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) - else: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1])) - prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept( - False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, - thresholding_for_artificial_class_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) - ys = slice(*self.page_coord[0:2]) - xs = slice(*self.page_coord[2:4]) - prediction_regions_org[ys, xs] = prediction_regions_page - confidence_matrix[ys, xs] = confidence_matrix_page - - else: - new_h = (900+ (num_col_classifier-3)*100) - img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) - self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", - img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) - prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( - True, img_resized, self.model_region_1_2, n_batch_inference=2, - thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) - ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - #print("inside 3 ", time.time()-t_in) - #plt.imshow(prediction_regions_org[:,:,0]) - #plt.show() - - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - confidence_matrix = resize_image(confidence_matrix, img_height_h, img_width_h ) - img_bin = resize_image(img_bin, img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - mask_texts_only = mask_texts_only.astype('uint8') - - ##if num_col_classifier == 1 or num_col_classifier == 2: - ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) - - mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) - - #plt.imshow(test_khat[:,:]) - #plt.show() - #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) - #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) - #plt.imshow(test_khat[:,:]) - #plt.show() - - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - #plt.imshow(test_khat[:,:]) - #plt.show() - #sys.exit() - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - - text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - - #plt.imshow(textline_mask_tot_ea) - #plt.show() - - textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 - - #plt.imshow(textline_mask_tot_ea) - #plt.show() - #print("inside 4 ", time.time()-t_in) - self.logger.debug("exit get_regions_light_v") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix - else: + if self.skip_layout_and_reading_order: img_bin = resize_image(img_bin,img_height_h, img_width_h ) self.logger.debug("exit get_regions_light_v") - return None, erosion_hurts, None, textline_mask_tot_ea, img_bin, None + return None, erosion_hurts, None, None, textline_mask_tot_ea, img_bin, None + + #print("inside 2 ", time.time()-t_in) + if num_col_classifier == 1 or num_col_classifier == 2: + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + self.logger.debug("resized to %dx%d for %d cols", + img_resized.shape[1], img_resized.shape[0], num_col_classifier) + prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( + True, img_resized, self.models["region_1_2"], n_batch_inference=1, + thresholding_for_some_classes_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1])) + prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept( + False, self.image_page_org_size, self.models["region_1_2"], n_batch_inference=1, + thresholding_for_artificial_class_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) + ys = slice(*self.page_coord[0:2]) + xs = slice(*self.page_coord[2:4]) + prediction_regions_org[ys, xs] = prediction_regions_page + confidence_matrix[ys, xs] = confidence_matrix_page + + else: + new_h = (900+ (num_col_classifier-3)*100) + img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) + self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", + img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) + prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( + True, img_resized, self.models["region_1_2"], n_batch_inference=2, + thresholding_for_some_classes_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.models["region"], + ###n_batch_inference=3, + ###thresholding_for_some_classes_in_light_version=True) + #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) + #plt.show() + + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + confidence_matrix = resize_image(confidence_matrix, img_height_h, img_width_h ) + img_bin = resize_image(img_bin, img_height_h, img_width_h ) + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + mask_texts_only = mask_texts_only.astype('uint8') + + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + + mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 + + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + test_khat = np.zeros(prediction_regions_org.shape) + test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1)) + + #plt.imshow(test_khat[:,:]) + #plt.show() + #for jv in range(1): + #print(jv, hir_seplines[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1)) + #plt.imshow(test_khat[:,:]) + #plt.show() + + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) + + test_khat = np.zeros(prediction_regions_org.shape) + test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + + text_regions_p_true = np.zeros(prediction_regions_org.shape) + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) + + text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) + + textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 + #plt.imshow(textline_mask_tot_ea) + #plt.show() + #print("inside 4 ", time.time()-t_in) + self.logger.debug("exit get_regions_light_v") + return (text_regions_p_true, + erosion_hurts, + polygons_seplines, + polygons_of_only_texts, + textline_mask_tot_ea, + img_bin, + confidence_matrix) def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2306,7 +2350,7 @@ class Eynollah: ratio_x=1 img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - prediction_regions_org_y = self.do_prediction(True, img, self.model_region) + prediction_regions_org_y = self.do_prediction(True, img, self.models["region"]) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org_y[:,:,0]) @@ -2321,7 +2365,7 @@ class Eynollah: _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.models["region"]) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -2329,7 +2373,7 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) - prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) + prediction_regions_org2 = self.do_prediction(True, img, self.models["region_p2"], marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) @@ -2353,7 +2397,7 @@ class Eynollah: if self.input_binary: prediction_bin = np.copy(img_org) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) @@ -2363,7 +2407,7 @@ class Eynollah: img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.models["region"]) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -2372,9 +2416,9 @@ class Eynollah: mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2386,11 +2430,11 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_lines_xml + return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_only_texts except: if self.input_binary: prediction_bin = np.copy(img_org) - prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) @@ -2401,29 +2445,25 @@ class Eynollah: img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.models["region"]) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) - #prediction_regions_org = self.do_prediction(True, img, self.model_region) - + #prediction_regions_org = self.do_prediction(True, img, self.models["region"]) #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - #prediction_regions_org = prediction_regions_org[:,:,0] - #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - mask_lines_only = (prediction_regions_org == 3)*1 mask_texts_only = (prediction_regions_org == 1)*1 mask_images_only= (prediction_regions_org == 2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2436,378 +2476,141 @@ class Eynollah: erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_lines_xml + return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_only_texts - def do_order_of_regions_full_layout( + def do_order_of_regions( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): - self.logger.debug("enter do_order_of_regions_full_layout") + self.logger.debug("enter do_order_of_regions") + contours_only_text_parent = np.array(contours_only_text_parent) + contours_only_text_parent_h = np.array(contours_only_text_parent_h) boxes = np.array(boxes, dtype=int) # to be on the safe side - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + c_boxes = np.stack((0.5 * boxes[:, 2:4].sum(axis=1), + 0.5 * boxes[:, 0:2].sum(axis=1))) + cx_main, cy_main, mx_main, Mx_main, my_main, My_main, mxy_main = find_new_features_of_contours( contours_only_text_parent) - cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( + cx_head, cy_head, mx_head, Mx_head, my_head, My_head, mxy_head = find_new_features_of_contours( contours_only_text_parent_h) - try: - arg_text_con = [] - for ii in range(len(cx_text_only)): + def match_boxes(only_centers: bool): + arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int) + for ii in range(len(contours_only_text_parent)): check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80 >= boxes[jj][0] and - x_min_text_only[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main[ii] >= boxes[jj][2] and - y_cor_x_min_main[ii] < boxes[jj][3]): - arg_text_con.append(jj) + for jj, box in enumerate(boxes): + if ((cx_main[ii] >= box[0] and + cx_main[ii] < box[1] and + cy_main[ii] >= box[2] and + cy_main[ii] < box[3]) if only_centers else + (mx_main[ii] >= box[0] and + Mx_main[ii] < box[1] and + my_main[ii] >= box[2] and + My_main[ii] < box[3])): + arg_text_con_main[ii] = jj check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - arg_text_con_h = [] - for ii in range(len(cx_text_only_h)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and - x_min_text_only_h[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main_h[ii] >= boxes[jj][2] and - y_cor_x_min_main_h[ii] < boxes[jj][3]): - arg_text_con_h.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + - (cy_text_only_h[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con_h.append(ind_min) - args_contours_h = np.array(range(len(arg_text_con_h))) + dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_main[ii]], [cx_main[ii]]]), axis=0) + pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) & + (boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1])) + ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box)) + arg_text_con_main[ii] = ind_min + args_contours_main = np.arange(len(contours_only_text_parent)) + order_by_con_main = np.zeros_like(arg_text_con_main) - order_by_con_head = np.zeros(len(arg_text_con_h)) - order_by_con_main = np.zeros(len(arg_text_con)) + arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int) + for ii in range(len(contours_only_text_parent_h)): + check_if_textregion_located_in_a_box = False + for jj, box in enumerate(boxes): + if ((cx_head[ii] >= box[0] and + cx_head[ii] < box[1] and + cy_head[ii] >= box[2] and + cy_head[ii] < box[3]) if only_centers else + (mx_head[ii] >= box[0] and + Mx_head[ii] < box[1] and + my_head[ii] >= box[2] and + My_head[ii] < box[3])): + arg_text_con_head[ii] = jj + check_if_textregion_located_in_a_box = True + break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_head[ii]], [cx_head[ii]]]), axis=0) + pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) & + (boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1])) + ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box)) + arg_text_con_head[ii] = ind_min + args_contours_head = np.arange(len(contours_only_text_parent_h)) + order_by_con_head = np.zeros_like(arg_text_con_head) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] - for iij in range(len(boxes)): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] - con_inter_box = [] - con_inter_box_h = [] + for iij, box in enumerate(boxes): + ys = slice(*box[2:4]) + xs = slice(*box[0:2]) + args_contours_box_main = args_contours_main[arg_text_con_main == iij] + args_contours_box_head = args_contours_head[arg_text_con_head == iij] + con_inter_box = contours_only_text_parent[args_contours_box_main] + con_inter_box_h = contours_only_text_parent_h[args_contours_box_head] - for box in args_contours_box: - con_inter_box.append(contours_only_text_parent[box]) - - for box in args_contours_box_h: - con_inter_box_h.append(contours_only_text_parent_h[box]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2]) order_of_texts, id_of_texts = order_and_id_of_texts( con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] - indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] + indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1] + indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1] + indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2] + indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2] - for zahler, _ in enumerate(args_contours_box): + for zahler, _ in enumerate(args_contours_box_main): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \ + np.flatnonzero(indexes_sorted == arg_order_v) + ref_point - for zahler, _ in enumerate(args_contours_box_h): + for zahler, _ in enumerate(args_contours_box_head): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_head[indexes_by_type_head[zahler]]] = \ + np.flatnonzero(indexes_sorted == arg_order_v) + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point += len(id_of_texts) - order_of_texts_tot = [] - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - for tj1 in range(len(contours_only_text_parent_h)): - order_of_texts_tot.append(int(order_by_con_head[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - - except Exception as why: - self.logger.error(why) - arg_text_con = [] - for ii in range(len(cx_text_only)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (cx_text_only[ii] >= boxes[jj][0] and - cx_text_only[ii] < boxes[jj][1] and - cy_text_only[ii] >= boxes[jj][2] and - cy_text_only[ii] < boxes[jj][3]): - # this is valid if the center of region identify in which box it is located - arg_text_con.append(jj) - check_if_textregion_located_in_a_box = True - break - - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) - - ############################# head - - arg_text_con_h = [] - for ii in range(len(cx_text_only_h)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (cx_text_only_h[ii] >= boxes[jj][0] and - cx_text_only_h[ii] < boxes[jj][1] and - cy_text_only_h[ii] >= boxes[jj][2] and - cy_text_only_h[ii] < boxes[jj][3]): - # this is valid if the center of region identify in which box it is located - arg_text_con_h.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + - (cy_text_only_h[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con_h.append(ind_min) - args_contours_h = np.array(range(len(arg_text_con_h))) - order_by_con_head = np.zeros(len(arg_text_con_h)) - - ref_point = 0 - order_of_texts_tot = [] - id_of_texts_tot = [] - for iij, _ in enumerate(boxes): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] - con_inter_box = [] - con_inter_box_h = [] - - for box in args_contours_box: - con_inter_box.append(contours_only_text_parent[box]) - - for box in args_contours_box_h: - con_inter_box_h.append(contours_only_text_parent_h[box]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - - order_of_texts, id_of_texts = order_and_id_of_texts( - con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] - indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] - - for zahler, _ in enumerate(args_contours_box): - arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for zahler, _ in enumerate(args_contours_box_h): - arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for jji, _ in enumerate(id_of_texts): - order_of_texts_tot.append(order_of_texts[jji] + ref_point) - id_of_texts_tot.append(id_of_texts[jji]) - ref_point += len(id_of_texts) - - order_of_texts_tot = [] - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - for tj1 in range(len(contours_only_text_parent_h)): - order_of_texts_tot.append(int(order_by_con_head[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - - self.logger.debug("exit do_order_of_regions_full_layout") - return order_text_new, id_of_texts_tot - - def do_order_of_regions_no_full_layout( - self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): - - self.logger.debug("enter do_order_of_regions_no_full_layout") - boxes = np.array(boxes, dtype=int) # to be on the safe side - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( - contours_only_text_parent) + order_of_texts_tot = np.concatenate((order_by_con_main, + order_by_con_head)) + order_text_new = np.argsort(order_of_texts_tot) + return order_text_new, id_of_texts_tot try: - arg_text_con = [] - for ii in range(len(cx_text_only)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80 >= boxes[jj][0] and - x_min_text_only[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main[ii] >= boxes[jj][2] and - y_cor_x_min_main[ii] < boxes[jj][3]): - arg_text_con.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) - - ref_point = 0 - order_of_texts_tot = [] - id_of_texts_tot = [] - for iij in range(len(boxes)): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - con_inter_box = [] - con_inter_box_h = [] - for i in range(len(args_contours_box)): - con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - - order_of_texts, id_of_texts = order_and_id_of_texts( - con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - - for zahler, _ in enumerate(args_contours_box): - arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for jji, _ in enumerate(id_of_texts): - order_of_texts_tot.append(order_of_texts[jji] + ref_point) - id_of_texts_tot.append(id_of_texts[jji]) - ref_point += len(id_of_texts) - - order_of_texts_tot = [] - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + results = match_boxes(False) except Exception as why: self.logger.error(why) - arg_text_con = [] - for ii in range(len(cx_text_only)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (cx_text_only[ii] >= boxes[jj][0] and - cx_text_only[ii] < boxes[jj][1] and - cy_text_only[ii] >= boxes[jj][2] and - cy_text_only[ii] < boxes[jj][3]): - # this is valid if the center of region identify in which box it is located - arg_text_con.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) + results = match_boxes(True) - ref_point = 0 - order_of_texts_tot = [] - id_of_texts_tot = [] - for iij in range(len(boxes)): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - con_inter_box = [] - con_inter_box_h = [] - for i in range(len(args_contours_box)): - con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - - order_of_texts, id_of_texts = order_and_id_of_texts( - con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - - for zahler, _ in enumerate(args_contours_box): - arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for jji, _ in enumerate(id_of_texts): - order_of_texts_tot.append(order_of_texts[jji] + ref_point) - id_of_texts_tot.append(id_of_texts[jji]) - ref_point += len(id_of_texts) - - order_of_texts_tot = [] - - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - - self.logger.debug("exit do_order_of_regions_no_full_layout") - return order_text_new, id_of_texts_tot + self.logger.debug("exit do_order_of_regions") + return results def check_iou_of_bounding_box_and_contour_for_tables( self, layout, table_prediction_early, pixel_table, num_col_classifier): layout_org = np.copy(layout) - layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0 - layout = (layout[:,:,0]==pixel_table)*1 - - layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) - layout = layout.astype(np.uint8) - imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY ) - _, thresh = cv2.threshold(imgray, 0, 255, 0) + layout_org[layout_org == pixel_table] = 0 + layout = (layout == pixel_table).astype(np.uint8) * 1 + _, thresh = cv2.threshold(layout, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cnt_size = np.array([cv2.contourArea(contours[j]) - for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(cnt) for cnt in contours]) contours_new = [] - for i in range(len(contours)): - x, y, w, h = cv2.boundingRect(contours[i]) + for i, contour in enumerate(contours): + x, y, w, h = cv2.boundingRect(contour) iou = cnt_size[i] /float(w*h) *100 if iou<80: - layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) - layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) + layout_contour = np.zeros(layout_org.shape[:2]) + layout_contour = cv2.fillPoly(layout_contour, pts=[contour] ,color=1) layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) @@ -2823,44 +2626,42 @@ class Eynollah: layout_contour=cv2.erode(layout_contour[:,:], KERNEL, iterations=5) layout_contour=cv2.dilate(layout_contour[:,:], KERNEL, iterations=5) - layout_contour =np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) layout_contour = layout_contour.astype(np.uint8) - - imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY ) - _, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(layout_contour, 0, 255, 0) contours_sep, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for ji in range(len(contours_sep) ): contours_new.append(contours_sep[ji]) if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) + only_recent_contour_image = np.zeros(layout.shape[:2]) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, + pts=[contours_sep[ji]], color=1) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') if iou_in>30: - layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=pixel_table) else: pass else: - layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) + layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=pixel_table) else: - contours_new.append(contours[i]) + contours_new.append(contour) if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) + only_recent_contour_image = np.zeros(layout.shape[:2]) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, pts=[contour],color=1) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in') if iou_in>30: - layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contour], color=pixel_table) else: pass else: - layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contour], color=pixel_table) return layout_org, contours_new @@ -2907,58 +2708,52 @@ class Eynollah: pass boxes = np.array(boxes, dtype=int) # to be on the safe side - img_comm_e = np.zeros(image_revised_1.shape) - img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) - + img_comm = np.zeros(image_revised_1.shape, dtype=np.uint8) for indiv in np.unique(image_revised_1): - image_col=(image_revised_1==indiv)*255 - img_comm_in=np.repeat(image_col[:, :, np.newaxis], 3, axis=2) - img_comm_in=img_comm_in.astype(np.uint8) - - imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + image_col = (image_revised_1 == indiv).astype(np.uint8) * 255 + _, thresh = cv2.threshold(image_col, 0, 255, 0) contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) if indiv==pixel_table: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, + max_area=1, min_area=0.001) else: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, + max_area=1, min_area=min_area) - img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) - img_comm = img_comm.astype(np.uint8) + img_comm = cv2.fillPoly(img_comm, pts=main_contours, color=indiv) if not self.isNaN(slope_mean_hor): - image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) + image_revised_last = np.zeros(image_regions_eraly_p.shape[:2]) for i in range(len(boxes)): box_ys = slice(*boxes[i][2:4]) box_xs = slice(*boxes[i][0:2]) image_box = img_comm[box_ys, box_xs] try: - image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 + image_box_tabels_1 = (image_box == pixel_table) * 1 contours_tab,_=return_contours_of_image(image_box_tabels_1) contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003) - image_box_tabels_1=(image_box[:,:,0]==pixel_line)*1 + image_box_tabels_1 = (image_box == pixel_line).astype(np.uint8) * 1 + image_box_tabels_and_m_text = ( (image_box == pixel_table) | + (image_box == 1) ).astype(np.uint8) * 1 - image_box_tabels_and_m_text=( (image_box[:,:,0]==pixel_table) | (image_box[:,:,0]==1) )*1 - image_box_tabels_and_m_text=image_box_tabels_and_m_text.astype(np.uint8) + image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5) - image_box_tabels_1=image_box_tabels_1.astype(np.uint8) - image_box_tabels_1 = cv2.dilate(image_box_tabels_1,KERNEL,iterations = 5) - - contours_table_m_text,_=return_contours_of_image(image_box_tabels_and_m_text) - image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) - - image_box_tabels=image_box_tabels.astype(np.uint8) - imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours_table_m_text, _ = return_contours_of_image(image_box_tabels_and_m_text) + _, thresh = cv2.threshold(image_box_tabels_1, 0, 255, 0) + contours_line, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) - cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text) - cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab) + (cx_tab_m_text, cy_tab_m_text, + x_min_tab_m_text, x_max_tab_m_text, + y_min_tab_m_text, y_max_tab_m_text, + _) = find_new_features_of_contours(contours_table_m_text) + (cx_tabl, cy_tabl, + x_min_tabl, x_max_tabl, + y_min_tabl, y_max_tabl, + _) = find_new_features_of_contours(contours_tab) if len(y_min_main_tab )>0: y_down_tabs=[] @@ -2968,22 +2763,30 @@ class Eynollah: y_down_tab=[] y_up_tab=[] for i_l in range(len(y_min_main_line)): - if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]: + if (y_min_main_tab[i_t] > y_min_main_line[i_l] and + y_max_main_tab[i_t] > y_min_main_line[i_l] and + y_min_main_tab[i_t] > y_max_main_line[i_l] and + y_max_main_tab[i_t] > y_min_main_line[i_l]): pass - elif y_min_main_tab[i_t]0: for ijv in range(len(y_min_tab_col1)): - image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table + image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv])] = pixel_table return image_revised_last - def do_order_of_regions(self, *args, **kwargs): - if self.full_layout: - return self.do_order_of_regions_full_layout(*args, **kwargs) - return self.do_order_of_regions_no_full_layout(*args, **kwargs) - def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] patches = False if self.light_version: - prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_table) + prediction_table, _ = self.do_prediction_new_concept(patches, img, self.models["table"]) prediction_table = prediction_table.astype(np.int16) return prediction_table[:,:,0] else: if num_col_classifier < 4 and num_col_classifier > 2: - prediction_table = self.do_prediction(patches, img, self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + prediction_table = self.do_prediction(patches, img, self.models["table"]) + pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"]) pre_updown = cv2.flip(pre_updown, -1) prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 @@ -3050,8 +2848,8 @@ class Eynollah: xs = slice(w_start, w_start + img.shape[1]) img_new[ys, xs] = img - prediction_ext = self.do_prediction(patches, img_new, self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + prediction_ext = self.do_prediction(patches, img_new, self.models["table"]) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"]) pre_updown = cv2.flip(pre_updown, -1) prediction_table = prediction_ext[ys, xs] @@ -3072,8 +2870,8 @@ class Eynollah: xs = slice(w_start, w_start + img.shape[1]) img_new[ys, xs] = img - prediction_ext = self.do_prediction(patches, img_new, self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + prediction_ext = self.do_prediction(patches, img_new, self.models["table"]) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"]) pre_updown = cv2.flip(pre_updown, -1) prediction_table = prediction_ext[ys, xs] @@ -3085,10 +2883,10 @@ class Eynollah: prediction_table = np.zeros(img.shape) img_w_half = img.shape[1] // 2 - pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) - pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) - pre_full = self.do_prediction(patches, img[:,:,:], self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.models["table"]) + pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.models["table"]) + pre_full = self.do_prediction(patches, img[:,:,:], self.models["table"]) + pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"]) pre_updown = cv2.flip(pre_updown, -1) prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) @@ -3174,26 +2972,9 @@ class Eynollah: num_col = num_col + 1 if not num_column_is_classified: num_col_classifier = num_col + 1 - if self.num_col_upper and self.num_col_lower: - if self.num_col_upper == self.num_col_lower: - num_col_classifier = self.num_col_upper - else: - if num_col_classifier < self.num_col_lower: - num_col_classifier = self.num_col_lower - if num_col_classifier > self.num_col_upper: - num_col_classifier = self.num_col_upper - - elif self.num_col_lower and not self.num_col_upper: - if num_col_classifier < self.num_col_lower: - num_col_classifier = self.num_col_lower - - elif self.num_col_upper and not self.num_col_lower: - if num_col_classifier > self.num_col_upper: - num_col_classifier = self.num_col_upper - - else: - pass - + num_col_classifier = min(self.num_col_upper or num_col_classifier, + max(self.num_col_lower or num_col_classifier, + num_col_classifier)) except Exception as why: self.logger.error(why) num_col = None @@ -3289,7 +3070,8 @@ class Eynollah: else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: - img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) + img_org, img_res, is_image_enhanced = \ + self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified @@ -3298,7 +3080,10 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, + scaler_h_textline, + scaler_w_textline, + num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3308,20 +3093,18 @@ class Eynollah: def run_deskew(self, textline_mask_tot_ea): #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') - slope_deskew = return_deskew_slop_old_mp(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, - logger=self.logger, plotter=self.plotter) - slope_first = 0 - + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, + map=self.executor.map, logger=self.logger, plotter=self.plotter) if self.plotter: self.plotter.save_deskewed_image(slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew) - return slope_deskew, slope_first + return slope_deskew def run_marginals( - self, image_page, textline_mask_tot_ea, mask_images, mask_lines, + self, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): - image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] + textline_mask_tot = textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 text_regions_p_1[mask_lines[:, :] == 1] = 3 @@ -3339,10 +3122,7 @@ class Eynollah: except Exception as e: self.logger.error("exception %s", e) - if self.plotter: - self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) - self.plotter.save_plot_of_layout_main(text_regions_p, image_page) - return textline_mask_tot, text_regions_p, image_page_rotated + return textline_mask_tot, text_regions_p def run_boxes_no_full_layout( self, image_page, textline_mask_tot, text_regions_p, @@ -3359,7 +3139,9 @@ class Eynollah: regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 - regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators = (text_regions_p[:, :] == 1) * 1 + # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 + #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) #print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 @@ -3370,13 +3152,11 @@ class Eynollah: pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p, num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines) #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -3402,7 +3182,7 @@ class Eynollah: pass else: text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + text_regions_p_tables[(table_prediction == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, @@ -3423,12 +3203,13 @@ class Eynollah: pass else: text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + text_regions_p_tables = np.round(text_regions_p_tables) + text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, + peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction_n, 10, num_col_classifier) @@ -3436,27 +3217,28 @@ class Eynollah: img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, + text_regions_p.shape[0], text_regions_p.shape[1]) #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: if self.light_version: - text_regions_p[:,:][table_prediction[:,:]==1] = 10 - img_revised_tab=text_regions_p[:,:] + text_regions_p[table_prediction == 1] = 10 + img_revised_tab = text_regions_p[:,:] else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + img_revised_tab = np.copy(img_revised_tab2) + img_revised_tab[(text_regions_p == 1) & (img_revised_tab != 10)] = 1 else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + img_revised_tab = np.copy(text_regions_p) + img_revised_tab[img_revised_tab == 10] = 0 + img_revised_tab[img_revised_tab2_d_rotated == 10] = 10 - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + text_regions_p[text_regions_p == 10] = 0 + text_regions_p[img_revised_tab == 10] = 10 else: - img_revised_tab=text_regions_p[:,:] + img_revised_tab = text_regions_p[:,:] #img_revised_tab = text_regions_p[:, :] if self.light_version: polygons_of_images = return_contours_of_interested_region(text_regions_p, 2) @@ -3494,12 +3276,19 @@ class Eynollah: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, + table_prediction, slope_deskew) - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + text_regions_p_1_n = resize_image(text_regions_p_1_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, + text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 @@ -3514,12 +3303,19 @@ class Eynollah: else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, + table_prediction, slope_deskew) - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + text_regions_p_1_n = resize_image(text_regions_p_1_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, + text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 @@ -3536,13 +3332,11 @@ class Eynollah: pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p, num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines) if num_col_classifier>=3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3574,11 +3368,12 @@ class Eynollah: num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 + text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, + peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( @@ -3587,21 +3382,22 @@ class Eynollah: img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, + text_regions_p.shape[0], + text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + img_revised_tab = np.copy(img_revised_tab2) else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + img_revised_tab = np.copy(text_regions_p) + img_revised_tab[img_revised_tab == 10] = 0 + img_revised_tab[img_revised_tab2_d_rotated == 10] = 10 - ##img_revised_tab=img_revised_tab2[:,:,0] - #img_revised_tab=text_regions_p[:,:] - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 - #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 + ##img_revised_tab = img_revised_tab2[:,:] + #img_revised_tab = text_regions_p[:,:] + text_regions_p[text_regions_p == 10] = 0 + text_regions_p[img_revised_tab == 10] = 10 + #img_revised_tab[img_revised_tab2 == 10] = 10 pixel_img = 4 min_area_mar = 0.00001 @@ -3640,7 +3436,7 @@ class Eynollah: #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + ##regions_fully[:, :, 0][regions_fully_only_drop[:, :] == 4] = 4 drop_capital_label_in_full_layout_model = 3 drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 @@ -3659,7 +3455,8 @@ class Eynollah: ##else: ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, + ### regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 @@ -3691,18 +3488,6 @@ class Eynollah: regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables) - @staticmethod - def our_load_model(model_file): - if model_file.endswith('.h5') and Path(model_file[:-3]).exists(): - # prefer SavedModel over HDF5 format if it exists - model_file = model_file[:-3] - try: - model = load_model(model_file, compile=False) - except: - model = load_model(model_file, compile=False, custom_objects={ - "PatchEncoder": PatchEncoder, "Patches": Patches}) - return model - def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p): height1 =672#448 @@ -3722,7 +3507,10 @@ class Eynollah: min_cont_size_to_be_dilated = 10 if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version: - cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent) + (cx_conts, cy_conts, + x_min_conts, x_max_conts, + y_min_conts, y_max_conts, + _) = find_new_features_of_contours(contours_only_text_parent) args_cont_located = np.array(range(len(contours_only_text_parent))) diff_y_conts = np.abs(y_max_conts[:]-y_min_conts) @@ -3737,15 +3525,31 @@ class Eynollah: args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3] args_cont_located_included = args_cont_located[diff_x_ratio<1.3] - contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3] - contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3] + contours_only_text_parent_excluded = [contours_only_text_parent[ind] + #contours_only_text_parent[diff_x_ratio>=1.3] + for ind in range(len(contours_only_text_parent)) + if diff_x_ratio[ind]>=1.3] + contours_only_text_parent_included = [contours_only_text_parent[ind] + #contours_only_text_parent[diff_x_ratio<1.3] + for ind in range(len(contours_only_text_parent)) + if diff_x_ratio[ind]<1.3] - - cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3] - cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3] - - cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3] - cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3] + cx_conts_excluded = [cx_conts[ind] + #cx_conts[diff_x_ratio>=1.3] + for ind in range(len(cx_conts)) + if diff_x_ratio[ind]>=1.3] + cx_conts_included = [cx_conts[ind] + #cx_conts[diff_x_ratio<1.3] + for ind in range(len(cx_conts)) + if diff_x_ratio[ind]<1.3] + cy_conts_excluded = [cy_conts[ind] + #cy_conts[diff_x_ratio>=1.3] + for ind in range(len(cy_conts)) + if diff_x_ratio[ind]>=1.3] + cy_conts_included = [cy_conts[ind] + #cy_conts[diff_x_ratio<1.3] + for ind in range(len(cy_conts)) + if diff_x_ratio[ind]<1.3] #print(diff_x_ratio, 'ratio') text_regions_p = text_regions_p.astype('uint8') @@ -3767,7 +3571,10 @@ class Eynollah: contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated) contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated) - indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included) + indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = \ + self.return_indexes_of_contours_located_inside_another_list_of_contours( + contours_only_dilated, contours_only_text_parent_included, + cx_conts_included, cy_conts_included, args_cont_located_included) if len(args_cont_located_excluded)>0: @@ -3780,7 +3587,7 @@ class Eynollah: flattened_array = np.concatenate([arr.ravel() for arr in array_list]) #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques') - missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) ) + missing_textregions = list( set(range(len(contours_only_text_parent))) - set(flattened_array) ) #print(missing_textregions, 'missing_textregions') for ind in missing_textregions: @@ -3871,7 +3678,7 @@ class Eynollah: tot_counter += 1 batch.append(j) if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): - y_pr = self.model_reading_order.predict(input_1 , verbose=0) + y_pr = self.models["reading_order"].predict(input_1 , verbose=0) for jb, j in enumerate(batch): if y_pr[jb][0]>=0.5: post_list.append(j) @@ -3900,12 +3707,13 @@ class Eynollah: region_with_curr_order = ordered[ind] if region_with_curr_order < len(contours_only_dilated): if np.isscalar(indexes_of_located_cont[region_with_curr_order]): - org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]] + org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) else: arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order]) - org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list ( + org_contours_indexes.extend( + np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) else: - org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]] + org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return org_contours_indexes, region_ids @@ -3913,8 +3721,185 @@ class Eynollah: region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return ordered, region_ids + def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.2*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + print(len(peaks_real), 'len(peaks_real)') + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + arg_sort4 =arg_sort[::-1][:4] + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) + #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final[0], peaks_final[1] + else: + pass + + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + self, peaks_real, sum_smoothed, start_split, end_split): + + peaks_real = peaks_real[(peaks_realstart_split)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + arg_sort4 =arg_sort[::-1][:4] + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) + return peaks_final[0] + + def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.15*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + mid = int(width/2.) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + if len(peaks_real)>70: + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, width1, mid+2) + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, mid-2, width2) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peak_start, peak_start], [0, height-1]) + #plt.plot([peak_end, peak_end], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peak_start, peak_end + else: + pass + + def return_ocr_of_textline_without_common_section( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image) + if split_point: + image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + + #pixel_values1 = processor(image1, return_tensors="pt").pixel_values + #pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values + generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) + generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + #print(generated_text_merged,'generated_text_merged') + + #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + + #generated_text = generated_text1 + ' ' + generated_text2 + generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] + + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + else: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + #print(generated_text,'generated_text') + #print('########################################') + return generated_text + + def return_ocr_of_textline( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + try: + width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) + + image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) + + pixel_values1 = processor(image1, return_tensors="pt").pixel_values + pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + + match = sq(None, generated_text1, generated_text2).find_longest_match( + 0, len(generated_text1), 0, len(generated_text2)) + generated_text = generated_text1 + generated_text2[match.b+match.size:] + except: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): - return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + return list(np.array(ls_cons)[np.array(sorted_indexes)]) def return_it_in_two_groups(self, x_differential): split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 @@ -3941,351 +3926,40 @@ class Eynollah: return x_differential_new - def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): - #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): - for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] - area = cv2.contourArea(con_ind) - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 0.1) - y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: - dilation_m1 = 8 - if dilation_m1<6: - dilation_m1 = 6 - #print(dilation_m1, 'dilation_m1') - dilation_m1 = 6 - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - #print(results,'results') - results[results==0] = 1 - - diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - #indices_2 = indices_2[1:] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - - def dilate_textregions_contours(self, all_found_textline_polygons): - #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): - con_ind = all_found_textline_polygons[j] - #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') - area = cv2.contourArea(con_ind) - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 0.1) - y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: - dilation_m1 = 8 - if dilation_m1<6: - dilation_m1 = 6 - #print(dilation_m1, 'dilation_m1') - dilation_m1 = 4#6 - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - #print(results,'results') - results[results==0] = 1 - - diff_result = np.diff(results) - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - #indices_2 = indices_2[1:] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - - def dilate_textline_contours(self, all_found_textline_polygons): - for j in range(len(all_found_textline_polygons)): - for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] - area = cv2.contourArea(con_ind) - - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 3) - y_differential = gaussian_filter1d(y_differential, 3) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.35) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.35) - - if dilation_m1>12: - dilation_m1 = 12 - if dilation_m1<4: - dilation_m1 = 4 - #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - results[results==0] = 1 - - diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - - def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"): - if type_contour=="textregion": - areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] + def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image, + marginal_cnts=None, type_contour="textregion"): + if type_contour == "textregion": + areas = np.array(list(map(cv2.contourArea, contours))) area_tot = image.shape[0]*image.shape[1] + areas_ratio = areas / area_tot + cx_main, cy_main = find_center_of_contours(contours) - M_main = [cv2.moments(contours[j]) - for j in range(len(contours))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + contours_index_small = np.flatnonzero(areas_ratio < 1e-3) + contours_index_large = np.flatnonzero(areas_ratio >= 1e-3) - areas_ratio = np.array(areas)/ area_tot - contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] - contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] - - #contours_> = [contours[ind] for ind in contours_index_big] + #contours_> = [contours[ind] for ind in contours_index_large] indexes_to_be_removed = [] for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) - for ind in contours_index_big] - if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) - for ind in range(len(marginal_cnts))] + results = [cv2.pointPolygonTest(contours[ind_large], (cx_main[ind_small], + cy_main[ind_small]), + False) + for ind_large in contours_index_large] + results = np.array(results) + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + elif marginal_cnts: + results_marginal = [cv2.pointPolygonTest(marginal_cnt, + (cx_main[ind_small], + cy_main[ind_small]), + False) + for marginal_cnt in marginal_cnts] results_marginal = np.array(results_marginal) - if np.any(results_marginal==1): indexes_to_be_removed.append(ind_small) - results = np.array(results) - - if np.any(results==1): - indexes_to_be_removed.append(ind_small) - - if len(indexes_to_be_removed)>0: - indexes_to_be_removed = np.unique(indexes_to_be_removed) - indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] - for ind in indexes_to_be_removed: - contours.pop(ind) - if len(contours_d_ordered)>0: - contours_d_ordered.pop(ind) + contours = np.delete(contours, indexes_to_be_removed, axis=0) + if len(contours_d_ordered): + contours_d_ordered = np.delete(contours_d_ordered, indexes_to_be_removed, axis=0) return contours, contours_d_ordered @@ -4293,55 +3967,47 @@ class Eynollah: contours_txtline_of_all_textregions = [] indexes_of_textline_tot = [] index_textline_inside_textregion = [] + for ind_region, textlines in enumerate(contours): + contours_txtline_of_all_textregions.extend(textlines) + index_textline_inside_textregion.extend(list(range(len(textlines)))) + indexes_of_textline_tot.extend([ind_region] * len(textlines)) - for jj in range(len(contours)): - contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] - - ind_textline_inside_tr = list(range(len(contours[jj]))) - index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr - ind_ins = [jj] * len(contours[jj]) - indexes_of_textline_tot = indexes_of_textline_tot + ind_ins - - M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) - for j in range(len(contours_txtline_of_all_textregions))] - cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - - areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] + areas_tot = np.array(list(map(cv2.contourArea, contours_txtline_of_all_textregions))) area_tot_tot = image.shape[0]*image.shape[1] + cx_main_tot, cy_main_tot = find_center_of_contours(contours_txtline_of_all_textregions) - textregion_index_to_del = [] - textline_in_textregion_index_to_del = [] + textline_in_textregion_index_to_del = {} for ij in range(len(contours_txtline_of_all_textregions)): - args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - args_all.pop(ij) - - areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] - - args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] + args_without = np.delete(np.arange(len(contours_txtline_of_all_textregions)), ij) + areas_without = areas_tot[args_without] + args_with_bigger_area = args_without[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], + (cx_main_tot[ij], + cy_main_tot[ij]), + False) for ind in args_with_bigger_area ] results = np.array(results) if np.any(results==1): #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) - textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) - textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) - #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) + textline_in_textregion_index_to_del.setdefault( + indexes_of_textline_tot[ij], list()).append( + index_textline_inside_textregion[ij]) + #contours[indexes_of_textline_tot[ij]].pop(index_textline_inside_textregion[ij]) - textregion_index_to_del = np.array(textregion_index_to_del) - textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) - for ind_u_a_trs in np.unique(textregion_index_to_del): - textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] - textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] - for ittrd in textline_in_textregion_index_to_del_ind: - contours[ind_u_a_trs].pop(ittrd) + for textregion_index_to_del in textline_in_textregion_index_to_del: + contours[textregion_index_to_del] = list(np.delete( + contours[textregion_index_to_del], + textline_in_textregion_index_to_del[textregion_index_to_del], + # needed so numpy does not flatten the entire result when 0 left + axis=0)) return contours - def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc): + def return_indexes_of_contours_located_inside_another_list_of_contours( + self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc): indexes_of_located_cont = [] center_x_coordinates_of_located = [] center_y_coordinates_of_located = [] @@ -4355,7 +4021,8 @@ class Eynollah: for ind in range(len(cy_main_loc)) ] results = np.array(results) indexes_in = np.where((results == 0) | (results == 1)) - indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1)) + # [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1)) + indexes = indexes_loc[indexes_in] indexes_of_located_cont.append(indexes) center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] ) @@ -4365,228 +4032,84 @@ class Eynollah: def filter_contours_without_textline_inside( - self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions): - ###contours_txtline_of_all_textregions = [] - ###for jj in range(len(contours_textline)): - ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] + self, contours_par, contours_textline, + contours_only_text_parent_d_ordered, + conf_contours_textregions): - ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) - ### for j in range(len(contours_txtline_of_all_textregions))] - ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) - ### for j in range(len(M_main_textline))] - ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) - ### for j in range(len(M_main_textline))] + assert len(contours_par) == len(contours_textline) + indices = np.arange(len(contours_textline)) + indices = np.delete(indices, np.flatnonzero([len(lines) == 0 for lines in contours_textline])) + def filterfun(lis): + if len(lis) == 0: + return [] + return list(np.array(lis)[indices]) - ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] - ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - ###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + return (filterfun(contours_par), + filterfun(contours_textline), + filterfun(contours_only_text_parent_d_ordered), + filterfun(conf_contours_textregions), + # indices + ) - ###contours_with_textline = [] - ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) - ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] - ###results = np.array(results) - ###if np.any(results==1): - ###contours_with_textline.append(con_tr) - - textregion_index_to_del = [] - for index_textregion, textlines_textregion in enumerate(contours_textline): - if len(textlines_textregion)==0: - textregion_index_to_del.append(index_textregion) - - uniqe_args_trs = np.unique(textregion_index_to_del) - uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] - - for ind_u_a_trs in uniqe_args_trs_sorted: - conf_contours_textregions.pop(ind_u_a_trs) - contours.pop(ind_u_a_trs) - contours_textline.pop(ind_u_a_trs) - text_con_org.pop(ind_u_a_trs) - if len(contours_only_text_parent_d_ordered) > 0: - contours_only_text_parent_d_ordered.pop(ind_u_a_trs) - - return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) - - def dilate_textlines(self, all_found_textline_polygons): - for j in range(len(all_found_textline_polygons)): - for i in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][i] - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) - mult = x_biger_than_x*x_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if y_differential[0]==0: - y_differential[0] = 0.1 - if y_differential[-1]==0: - y_differential[-1]= 0.1 - y_differential = [y_differential[ind] if y_differential[ind] != 0 - else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) - for ind in range(len(y_differential))] - - if y_differential[0]==0.1: - y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: - y_differential[-1] = y_differential[-2] - y_differential.append(y_differential[0]) - - y_differential = [-1 if y_differential[ind] < 0 else 1 - for ind in range(len(y_differential))] - y_differential = self.return_it_in_two_groups(y_differential) - y_differential = np.array(y_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - - try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - - try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 - except: - pass - - else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - mult = y_biger_than_x*y_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: - x_differential[0] = 0.1 - if x_differential[-1]==0: - x_differential[-1]= 0.1 - x_differential = [x_differential[ind] if x_differential[ind] != 0 - else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) - for ind in range(len(x_differential))] - - if x_differential[0]==0.1: - x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: - x_differential[-1] = x_differential[-2] - x_differential.append(x_differential[0]) - - x_differential = [-1 if x_differential[ind] < 0 else 1 - for ind in range(len(x_differential))] - x_differential = self.return_it_in_two_groups(x_differential) - x_differential = np.array(x_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 - - try: - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 - - try: - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 - except: - pass - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] - - return all_found_textline_polygons - - def delete_regions_without_textlines( - self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, - contours_only_text_parent, index_by_text_par_con): - - slopes_rem = [] - all_found_textline_polygons_rem = [] - boxes_text_rem = [] - txt_con_org_rem = [] - contours_only_text_parent_rem = [] - index_by_text_par_con_rem = [] - - for i, ind_con in enumerate(all_found_textline_polygons): - if len(ind_con): - all_found_textline_polygons_rem.append(ind_con) - slopes_rem.append(slopes[i]) - boxes_text_rem.append(boxes_text[i]) - txt_con_org_rem.append(txt_con_org[i]) - contours_only_text_parent_rem.append(contours_only_text_parent[i]) - index_by_text_par_con_rem.append(index_by_text_par_con[i]) - - index_sort = np.argsort(index_by_text_par_con_rem) - indexes_new = np.array(range(len(index_by_text_par_con_rem))) - - index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] - for j in range(len(index_by_text_par_con_rem))] - - return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, - contours_only_text_parent_rem, index_by_text_par_con_rem_sort) - - def separate_marginals_to_left_and_right_and_order_from_top_to_down(self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width): - cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( - polygons_of_marginals) - + def separate_marginals_to_left_and_right_and_order_from_top_to_down( + self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + slopes_marginals, mid_point_of_page_width): + cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals) cx_marg = np.array(cx_marg) cy_marg = np.array(cy_marg) - - poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) - poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) - - all_found_textline_polygons_marginals_left = list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) - all_found_textline_polygons_marginals_right = list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) - - all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) - all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) - - slopes_marg_left = list( np.array(slopes_marginals)[cx_marg < mid_point_of_page_width] ) - slopes_marg_right = list( np.array(slopes_marginals)[cx_marg >= mid_point_of_page_width] ) - - cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] - cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] - - ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), key=lambda x: x[0])] - ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), key=lambda x: x[0])] - - ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, all_found_textline_polygons_marginals_left), key=lambda x: x[0])] - ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, all_found_textline_polygons_marginals_right), key=lambda x: x[0])] - - ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, all_box_coord_marginals_left), key=lambda x: x[0])] - ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, all_box_coord_marginals_right), key=lambda x: x[0])] - - ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), key=lambda x: x[0])] - ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), key=lambda x: x[0])] - - return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals + def split(lis): + array = np.array(lis) + return (list(array[cx_marg < mid_point_of_page_width]), + list(array[cx_marg >= mid_point_of_page_width])) + + (poly_marg_left, + poly_marg_right) = \ + split(polygons_of_marginals) + + (all_found_textline_polygons_marginals_left, + all_found_textline_polygons_marginals_right) = \ + split(all_found_textline_polygons_marginals) + + (all_box_coord_marginals_left, + all_box_coord_marginals_right) = \ + split(all_box_coord_marginals) + + (slopes_marg_left, + slopes_marg_right) = \ + split(slopes_marginals) + + (cy_marg_left, + cy_marg_right) = \ + split(cy_marg) + + order_left = np.argsort(cy_marg_left) + order_right = np.argsort(cy_marg_right) + def sort_left(lis): + return list(np.array(lis)[order_left]) + def sort_right(lis): + return list(np.array(lis)[order_right]) + + ordered_left_marginals = sort_left(poly_marg_left) + ordered_right_marginals = sort_right(poly_marg_right) + + ordered_left_marginals_textline = sort_left(all_found_textline_polygons_marginals_left) + ordered_right_marginals_textline = sort_right(all_found_textline_polygons_marginals_right) + + ordered_left_marginals_bbox = sort_left(all_box_coord_marginals_left) + ordered_right_marginals_bbox = sort_right(all_box_coord_marginals_right) + + ordered_left_slopes_marginals = sort_left(slopes_marg_left) + ordered_right_slopes_marginals = sort_right(slopes_marg_right) + + return (ordered_left_marginals, + ordered_right_marginals, + ordered_left_marginals_textline, + ordered_right_marginals_textline, + ordered_left_marginals_bbox, + ordered_right_marginals_bbox, + ordered_left_slopes_marginals, + ordered_right_slopes_marginals) def run(self, overwrite: bool = False, @@ -4668,12 +4191,14 @@ class Eynollah: def run_single(self): t0 = time.time() - self.logger.info(f"Processing file: {self.writer.image_filename}") + self.logger.info(f"Processing file: {self.writer.image_filename}") self.logger.info("Step 1/5: Image Enhancement") - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = \ + self.run_enhancement(self.light_version) - self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns") + self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, " + f"{self.dpi} DPI, {num_col_classifier} columns") if is_image_enhanced: self.logger.info("Enhancement applied") @@ -4684,7 +4209,8 @@ class Eynollah: if self.extract_only_images: self.logger.info("Step 2/5: Image Extraction Mode") - text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ + text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, \ + image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], @@ -4701,9 +4227,8 @@ class Eynollah: self.logger.info("Step 2/5: Basic Processing Mode") self.logger.info("Skipping layout analysis and reading order detection") - _ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \ - self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, - skip_layout_and_reading_order=self.skip_layout_and_reading_order) + _ ,_, _, _, textline_mask_tot_ea, img_bin_light, _ = \ + self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier,) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \ self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) @@ -4714,51 +4239,41 @@ class Eynollah: all_found_textline_polygons = filter_contours_area_of_image( textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - M_main_tot = [cv2.moments(all_found_textline_polygons[j]) - for j in range(len(all_found_textline_polygons))] - w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] for j in range(len(all_found_textline_polygons))] - w_h_textlines = [w_h_textlines[j][0] / float(w_h_textlines[j][1]) for j in range(len(w_h_textlines))] - cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - - all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines)#all_found_textline_polygons[::-1] + cx_main_tot, cy_main_tot = find_center_of_contours(all_found_textline_polygons) + w_h_textlines = [cv2.boundingRect(polygon)[2:] + for polygon in all_found_textline_polygons] + w_h_textlines = [w / float(h) for w, h in w_h_textlines] - all_found_textline_polygons=[ all_found_textline_polygons ] - - all_found_textline_polygons = self.dilate_textregions_contours_textline_version( - all_found_textline_polygons) + all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted( + #all_found_textline_polygons[::-1] + all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines) + all_found_textline_polygons = [ all_found_textline_polygons ] + all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") - order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] - - polygons_of_images = [] - slopes_marginals_left = [] - slopes_marginals_right = [] - polygons_of_marginals_left = [] - polygons_of_marginals_right = [] - all_found_textline_polygons_marginals_left = [] - all_found_textline_polygons_marginals_right = [] - all_box_coord_marginals_left = [] - all_box_coord_marginals_right = [] - polygons_lines_xml = [] - contours_tables = [] conf_contours_textregions =[0] if self.ocr and not self.tr: gc.collect() - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)), + self.models["ocr"], self.b_s_ocr, self.num_to_char, textline_light=True) else: ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) + all_found_textline_polygons, page_coord, [], + [], [], [], [], [], [], + slopes, [], [], + cont_page, [], [], + ocr_all_textlines=ocr_all_textlines, + conf_contours_textregion=conf_contours_textregions, + skip_layout_reading_order=True) self.logger.info("Basic processing complete") return pcgts @@ -4768,7 +4283,8 @@ class Eynollah: if self.light_version: self.logger.info("Using light version processing") - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ + text_regions_p_1 ,erosion_hurts, polygons_seplines, polygons_text_early, \ + textline_mask_tot_ea, img_bin_light, confidence_matrix = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4780,22 +4296,22 @@ class Eynollah: img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, - num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + num_col_classifier, num_column_is_classified, + erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) + else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ + text_regions_p_1, erosion_hurts, polygons_seplines, polygons_text_early = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info(f"Textregion detection took {time.time() - t1:.1f}s") @@ -4804,14 +4320,15 @@ class Eynollah: t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, + erosion_hurts) self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s") #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) #plt.show() self.logger.info(f"Layout analysis complete ({time.time() - t1:.1f}s)") - if not num_col: + if not num_col and len(polygons_text_early) == 0: self.logger.info("No columns detected - generating empty PAGE-XML") pcgts = self.writer.build_pagexml_no_full_layout( @@ -4825,9 +4342,7 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info(f"Textline detection took {time.time() - t1:.1f}s") t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - if np.abs(slope_deskew) > 0.01: # Only log if there is significant skew - self.logger.info(f"Applied deskew correction: {slope_deskew:.2f} degrees") + slope_deskew = self.run_deskew(textline_mask_tot_ea) self.logger.info(f"Deskewing took {time.time() - t1:.1f}s") elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] @@ -4845,10 +4360,25 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p, image_page_rotated = \ - self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, + textline_mask_tot, text_regions_p = \ + self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - + if self.plotter: + self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) + self.plotter.save_plot_of_layout_main(text_regions_p, image_page) + + if image_page.size: + # if ratio of text regions to page area is smaller that 30%, + # then deskew angle will not be allowed to exceed 45 + if (abs(slope_deskew) > 45 and + ((text_regions_p == 1).sum() + + (text_regions_p == 4).sum()) / float(image_page.size) <= 0.3): + slope_deskew = 0 + + # if there is no main text, then relabel marginalia as main + if (text_regions_p == 1).sum() == 0: + text_regions_p[text_regions_p == 4] = 1 + self.logger.info("Step 3/5: Text Line Detection") if self.curved_line: @@ -4863,24 +4393,25 @@ class Eynollah: textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s") ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + polygons_of_images, img_revised_tab, text_regions_p_1_n, \ + textline_mask_tot_d, regions_without_separators_d, \ boxes, boxes_d, polygons_of_marginals, contours_tables = \ self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + polygons_of_images, img_revised_tab, text_regions_p_1_n, \ + textline_mask_tot_d, regions_without_separators_d, \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 @@ -4894,107 +4425,130 @@ class Eynollah: ###min_con_area = 0.000005 contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + if len(contours_only_text_parent) > 0: + areas_tot_text = np.prod(text_only.shape) areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + areas_cnt_text = areas_cnt_text / float(areas_tot_text) #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) - if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + contours_only_text_parent = np.array(contours_only_text_parent)[areas_cnt_text > MIN_AREA_REGION] + areas_cnt_text_parent = areas_cnt_text[areas_cnt_text > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + contours_only_text_parent = contours_only_text_parent[index_con_parents] + areas_cnt_text_parent = areas_cnt_text_parent[index_con_parents] - contours_only_text_parent = self.return_list_of_contours_with_desired_order( - contours_only_text_parent, index_con_parents) + centers = np.stack(find_center_of_contours(contours_only_text_parent)) # [2, N] - ##try: - ##contours_only_text_parent = \ - ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = \ - ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order( - areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + center0 = centers[:, -1:] # [2, 1] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + areas_tot_text_d = np.prod(text_only_d.shape) areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + areas_cnt_text_d = areas_cnt_text_d / float(areas_tot_text_d) - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + contours_only_text_parent_d = np.array(contours_only_text_parent_d)[areas_cnt_text_d > MIN_AREA_REGION] + areas_cnt_text_d = areas_cnt_text_d[areas_cnt_text_d > MIN_AREA_REGION] + + if len(contours_only_text_parent_d): index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order( - contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = \ - #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = \ - #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order( - areas_cnt_text_d, index_con_parents_d) + contours_only_text_parent_d = np.array(contours_only_text_parent_d)[index_con_parents_d] + areas_cnt_text_d = areas_cnt_text_d[index_con_parents_d] - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + - (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) - for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + - (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) - for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d)) # [2, N] - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(str(why)) + center0_d = centers_d[:, -1:].copy() # [2, 1] + # find the largest among the largest 5 deskewed contours + # that is also closest to the largest original contour + last5_centers_d = centers_d[:, -5:] + dists_d = np.linalg.norm(center0 - last5_centers_d, axis=0) + ind_largest = len(contours_only_text_parent_d) - last5_centers_d.shape[1] + np.argmin(dists_d) + center0_d[:, 0] = centers_d[:, ind_largest] + + # order new contours the same way as the undeskewed contours + # (by calculating the offset of the largest contours, respectively, + # of the new and undeskewed image; then for each contour, + # finding the closest new contour, with proximity calculated + # as distance of their centers modulo offset vector) (h, w) = text_only.shape[:2] center = (w // 2.0, h // 2.0) M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big + center0 = np.dot(M_22, center0) # [2, 1] + offset = center0 - center0_d # [2, 1] - contours_only_text_parent_d_ordered = [] + centers = np.dot(M_22, centers) - offset # [2,N] + # add dimension for area (so only contours of similar size will be considered close) + centers = np.append(centers, areas_cnt_text_parent[np.newaxis], axis=0) + centers_d = np.append(centers_d, areas_cnt_text_d[np.newaxis], axis=0) + + dists = np.zeros((len(contours_only_text_parent), len(contours_only_text_parent_d))) for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + - (p[1] - cy_biggest_d[j]) ** 2) - for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - #contours_only_text_parent = [] + dists[i] = np.linalg.norm(centers[:, i:i + 1] - centers_d, axis=0) + corresp = np.zeros(dists.shape, dtype=bool) + # keep searching next-closest until at least one correspondence on each side + while not np.all(corresp.sum(axis=1)) and not np.all(corresp.sum(axis=0)): + idx = np.nanargmin(dists) + i, j = np.unravel_index(idx, dists.shape) + dists[i, j] = np.nan + corresp[i, j] = True + #print("original/deskewed adjacency", corresp.nonzero()) + contours_only_text_parent_d_ordered = np.zeros_like(contours_only_text_parent) + contours_only_text_parent_d_ordered = contours_only_text_parent_d[np.argmax(corresp, axis=1)] + # img1 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # for i in range(len(contours_only_text_parent)): + # cv2.fillPoly(img1, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1) + # plt.subplot(2, 2, 1, title="direct corresp contours") + # plt.imshow(img1) + # img2 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # join deskewed regions mapping to single original ones + for i in range(len(contours_only_text_parent)): + if np.count_nonzero(corresp[i]) > 1: + indices = np.flatnonzero(corresp[i]) + #print("joining", indices) + polygons_d = [contour2polygon(contour) + for contour in contours_only_text_parent_d[indices]] + contour_d = polygon2contour(join_polygons(polygons_d)) + contours_only_text_parent_d_ordered[i] = contour_d + # cv2.fillPoly(img2, pts=[contour_d], color=i + 1) + # plt.subplot(2, 2, 3, title="joined contours") + # plt.imshow(img2) + # img3 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # split deskewed regions mapping to multiple original ones + def deskew(polygon): + polygon = shapely.affinity.rotate(polygon, -slope_deskew, origin=center) + polygon = shapely.affinity.translate(polygon, *offset.squeeze()) + return polygon + for j in range(len(contours_only_text_parent_d)): + if np.count_nonzero(corresp[:, j]) > 1: + indices = np.flatnonzero(corresp[:, j]) + #print("splitting along", indices) + polygons = [deskew(contour2polygon(contour)) + for contour in contours_only_text_parent[indices]] + polygon_d = contour2polygon(contours_only_text_parent_d[j]) + polygons_d = [make_intersection(polygon_d, polygon) + for polygon in polygons] + # ignore where there is no actual overlap + indices = indices[np.flatnonzero(polygons_d)] + contours_d = [polygon2contour(polygon_d) + for polygon_d in polygons_d + if polygon_d] + contours_only_text_parent_d_ordered[indices] = contours_d + # cv2.fillPoly(img3, pts=contours_d, color=j + 1) + # plt.subplot(2, 2, 4, title="split contours") + # plt.imshow(img3) + # img4 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # for i in range(len(contours_only_text_parent)): + # cv2.fillPoly(img4, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1) + # plt.subplot(2, 2, 2, title="result contours") + # plt.imshow(img4) + # plt.show() if not len(contours_only_text_parent): # stop early @@ -5003,123 +4557,116 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout( [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], - polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [], - cont_page, polygons_lines_xml) + polygons_of_marginals, polygons_of_marginals, + empty_marginals, empty_marginals, + empty_marginals, empty_marginals, + [], [], [], [], + cont_page, polygons_seplines) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, - polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], - cont_page, polygons_lines_xml, contours_tables) + polygons_of_marginals, polygons_of_marginals, + empty_marginals, empty_marginals, + empty_marginals, empty_marginals, + [], [], [], + cont_page, polygons_seplines, contours_tables) return pcgts - #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours( - contours_only_text_parent) + contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( - contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) + contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, + marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + conf_contours_textregions = get_textregion_contours_in_org_image_light( + contours_only_text_parent, self.image, confidence_matrix) + #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) else: - txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) + conf_contours_textregions = get_textregion_contours_in_org_image_light( + contours_only_text_parent, self.image, confidence_matrix) #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + boxes_text = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals = get_text_region_boxes_by_given_contours(polygons_of_marginals) #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: if self.light_version: if self.textline_light: - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ - all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, - image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, - image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new_light2( + contours_only_text_parent, textline_mask_tot_ea_org, + boxes_text, slope_deskew) + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light2( + polygons_of_marginals, textline_mask_tot_ea_org, + boxes_marginals, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, - # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, - # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons = dilate_textline_contours( all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons_marginals = dilate_textline_contours( all_found_textline_polygons_marginals) - contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ - index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) + contours_only_text_parent, all_found_textline_polygons, \ + contours_only_text_parent_d_ordered, conf_contours_textregions = \ + self.filter_contours_without_textline_inside( + contours_only_text_parent, all_found_textline_polygons, + contours_only_text_parent_d_ordered, conf_contours_textregions) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ - index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new_light( + contours_only_text_parent, contours_only_text_parent, textline_mask_tot_ea, + boxes_text, slope_deskew) + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ - all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new( + contours_only_text_parent, contours_only_text_parent, textline_mask_tot_ea, + boxes_text, slope_deskew) + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ - all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, - image_page_rotated, boxes_text, text_only, + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new_curved( + contours_only_text_parent, textline_mask_tot_ea_erode, + boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, - image_page_rotated, boxes_marginals, text_only, + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_curved( + polygons_of_marginals, textline_mask_tot_ea_erode, + boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) mid_point_of_page_width = text_regions_p.shape[1] / 2. - polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right = self.separate_marginals_to_left_and_right_and_order_from_top_to_down(polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width) + (polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes_marginals_left, slopes_marginals_right) = \ + self.separate_marginals_to_left_and_right_and_order_from_top_to_down( + polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + slopes_marginals, mid_point_of_page_width) #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred') + if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( - contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: - #takes long timee - contours_only_text_parent_d_ordered = None if self.light_version: fun = check_any_text_region_in_model_one_is_main_or_header_light else: @@ -5128,40 +4675,38 @@ class Eynollah: all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \ conf_contours_textregions, conf_contours_textregions_h = fun( - text_regions_p, regions_fully, contours_only_text_parent, - all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) + text_regions_p, regions_fully, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + label_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p, label_img, + min_area=0.00003) ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: - pixel_seps = 6 + label_seps = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) + text_regions_p, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered) + text_regions_p_1_n, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + text_regions_p, num_col_classifier, self.tables, label_seps) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + text_regions_p_1_n, num_col_classifier, self.tables, label_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -5174,91 +4719,21 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, - num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col_classifier, erosion_hurts, self.tables, self.right2left, + logger=self.logger) else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, - num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col_classifier, erosion_hurts, self.tables, self.right2left, + logger=self.logger) + else: + contours_only_text_parent_h = [] + contours_only_text_parent_h_d_ordered = [] if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() - if self.full_layout: - self.logger.info("Step 4/5: Reading Order Detection") - - if self.reading_order_machine_based: - self.logger.info("Using machine-based detection") - if self.right2left: - self.logger.info("Right-to-left mode enabled") - if self.headers_off: - self.logger.info("Headers ignored in reading order") - - if self.reading_order_machine_based: - tror = time.time() - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( - contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") - - if self.ocr and not self.tr: - self.logger.info("Step 4.5/5: OCR Processing") - - if torch.cuda.is_available(): - self.logger.info("Using GPU acceleration") - else: - self.logger.info("Using CPU processing") - - gc.collect() - if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines = None - - if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_marginals_left = None - - if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_marginals_right = None - - if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: - ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_h = None - - if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: - ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_drop = None - else: - ocr_all_textlines = None - ocr_all_textlines_marginals_left = None - ocr_all_textlines_marginals_right = None - ocr_all_textlines_h = None - ocr_all_textlines_drop = None - - self.logger.info("Step 5/5: Output Generation") - - pcgts = self.writer.build_pagexml_full_layout( - contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, - polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) - - return pcgts - - contours_only_text_parent_h = None self.logger.info("Step 4/5: Reading Order Detection") if self.reading_order_machine_based: @@ -5267,7 +4742,7 @@ class Eynollah: self.logger.info("Right-to-left mode enabled") if self.headers_off: self.logger.info("Headers ignored in reading order") - + if self.reading_order_machine_based: order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( contours_only_text_parent, contours_only_text_parent_h, text_regions_p) @@ -5276,108 +4751,128 @@ class Eynollah: order_text_new, id_of_texts_tot = self.do_order_of_regions( contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( - contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - if self.ocr and self.tr: - self.logger.info("Step 4.5/5: OCR Processing") - - if torch.cuda.is_available(): - self.logger.info("Using GPU acceleration") - else: - self.logger.info("Using CPU processing") - - if self.light_version: - self.logger.info("Using light version OCR") - - if self.textline_light: - self.logger.info("Using light text line detection for OCR") - - self.logger.info("Processing text lines...") - - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - ocr_textline_in_textregion.append(text_ocr) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - elif self.ocr and not self.tr: - gc.collect() - if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - - if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - - if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - - else: - ocr_all_textlines = None - ocr_all_textlines_marginals_left = None - ocr_all_textlines_marginals_right = None + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, + boxes_d, textline_mask_tot_d) self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") - self.logger.info("Step 5/5: Output Generation") - self.logger.info("Generating PAGE-XML output") + ocr_all_textlines = None + ocr_all_textlines_marginals_left = None + ocr_all_textlines_marginals_right = None + ocr_all_textlines_h = None + ocr_all_textlines_drop = None + if self.ocr: + self.logger.info("Step 4.5/5: OCR Processing") - pcgts = self.writer.build_pagexml_no_full_layout( - txt_con_org, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) - - self.logger.info(f"Output file: {self.writer.output_filename}") - + if not self.tr: + gc.collect() + + if len(all_found_textline_polygons): + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, all_box_coord, + self.models["ocr"], self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if len(all_found_textline_polygons_marginals_left): + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left, + self.models["ocr"], self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if len(all_found_textline_polygons_marginals_right): + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right, + self.models["ocr"], self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if self.full_layout and len(all_found_textline_polygons): + ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_h, all_box_coord_h, + self.models["ocr"], self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if self.full_layout and len(polygons_of_drop_capitals): + ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines( + image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)), + self.models["ocr"], self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + else: + if self.light_version: + self.logger.info("Using light version OCR") + if self.textline_light: + self.logger.info("Using light text line detection for OCR") + self.logger.info("Processing text lines...") + + gc.collect() + + torch.cuda.empty_cache() + self.models["ocr"].to(self.device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + ocr_all_textlines = [] + # FIXME: what about lines in marginals / headings / drop-capitals here? + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section( + img_croped, self.models["ocr"], self.processor, self.device, w, h2w_ratio, ind_tot) + ocr_textline_in_textregion.append(text_ocr) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + self.logger.info("Step 5/5: Output Generation") + + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout( + contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, + polygons_of_images, contours_tables, polygons_of_drop_capitals, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, + ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, + ocr_all_textlines_drop, + conf_contours_textregions, conf_contours_textregions_h) + else: + pcgts = self.writer.build_pagexml_no_full_layout( + contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_box_coord, polygons_of_images, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, contours_tables, + ocr_all_textlines=ocr_all_textlines, + ocr_all_textlines_marginals_left=ocr_all_textlines_marginals_left, + ocr_all_textlines_marginals_right=ocr_all_textlines_marginals_right, + conf_contours_textregions=conf_contours_textregions) + return pcgts @@ -5395,7 +4890,6 @@ class Eynollah_ocr: min_conf_value_of_textline_text : Optional[float]=None, logger=None, ): - self.dir_models = dir_models self.model_name = model_name self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text @@ -5426,7 +4920,7 @@ class Eynollah_ocr: if self.model_name: self.model_ocr_dir = self.model_name else: - self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250904" + self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5518,7 +5012,9 @@ class Eynollah_ocr: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + textline_coords = np.array( [ [int(x.split(',')[0]), + int(x.split(',')[1]) ] + for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) if dir_out_image_text: @@ -5534,9 +5030,12 @@ class Eynollah_ocr: img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 - self.logger.debug("processing %d lines for '%s'", len(cropped_lines), nn.attrib['id']) + self.logger.debug("processing %d lines for '%s'", + len(cropped_lines), nn.attrib['id']) if h2w_ratio > 0.1: - cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) + cropped_lines.append(resize_image(img_crop, + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width) ) cropped_lines_meging_indexing.append(0) indexer_b_s+=1 if indexer_b_s==self.b_s: @@ -5545,8 +5044,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5554,7 +5055,9 @@ class Eynollah_ocr: splited_images, _ = return_textlines_split_if_needed(img_crop, None) #print(splited_images) if splited_images: - cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) + cropped_lines.append(resize_image(splited_images[0], + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(1) indexer_b_s+=1 @@ -5564,13 +5067,17 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged - cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) + cropped_lines.append(resize_image(splited_images[1], + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(-1) indexer_b_s+=1 @@ -5580,8 +5087,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5596,8 +5105,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5628,15 +5139,22 @@ class Eynollah_ocr: ####n_end = (i+1)*self.b_s ####imgs = cropped_lines[n_start:n_end] ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - ####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - ####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + ####generated_ids_merged = self.model_ocr.generate( + #### pixel_values_merged.to(self.device)) + ####generated_text_merged = self.processor.batch_decode( + #### generated_ids_merged, skip_special_tokens=True) ####extracted_texts = extracted_texts + generated_text_merged del cropped_lines gc.collect() - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] #print(extracted_texts_merged, len(extracted_texts_merged)) @@ -5658,7 +5176,8 @@ class Eynollah_ocr: w_bb = bb_ind[2] h_bb = bb_ind[3] - font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) ) + font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], + font.path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) @@ -5676,25 +5195,27 @@ class Eynollah_ocr: #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') #######text_by_textregion = [] #######for ind in unique_cropped_lines_region_indexer: - #######extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] - + #######ind = np.array(cropped_lines_region_indexer)==ind + #######extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] #######text_by_textregion.append(" ".join(extracted_texts_merged_un)) text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + ind = np.array(cropped_lines_region_indexer) == ind + extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] if len(extracted_texts_merged_un)>1: text_by_textregion_ind = "" next_glue = "" for indt in range(len(extracted_texts_merged_un)): - if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'): - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1] + if (extracted_texts_merged_un[indt].endswith('⸗') or + extracted_texts_merged_un[indt].endswith('-') or + extracted_texts_merged_un[indt].endswith('¬')): + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1] next_glue = "" else: - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt] + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt] next_glue = " " text_by_textregion.append(text_by_textregion_ind) - else: text_by_textregion.append(" ".join(extracted_texts_merged_un)) @@ -5752,7 +5273,9 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + ###sample_order = [(id_to_order[tid], text) + ### for tid, text in zip(id_textregions, textregions_by_existing_ids) + ### if tid in id_to_order] ##ordered_texts_sample = [text for _, text in sorted(sample_order)] ##tot_page_text = ' '.join(ordered_texts_sample) @@ -5763,7 +5286,7 @@ class Eynollah_ocr: ##unicode_textpage.text = tot_page_text ET.register_namespace("",name_space) - tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None) else: ###max_len = 280#512#280#512 ###padding_token = 1500#299#1500#299 @@ -5826,7 +5349,9 @@ class Eynollah_ocr: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + textline_coords = np.array( [ [int(x.split(',')[0]), + int(x.split(',')[1]) ] + for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) @@ -5858,17 +5383,19 @@ class Eynollah_ocr: img_crop[mask_poly==0] = 255 else: - #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w*h) , 'didi') + # print(file_name, angle_degrees, w*h, + # mask_poly[:,:,0].sum(), + # mask_poly[:,:,0].sum() /float(w*h) , + # 'didi') if angle_degrees > 3: better_des_slope = get_orientation_moments(textline_coords) - img_crop = rotate_image_with_padding(img_crop, better_des_slope ) - + img_crop = rotate_image_with_padding(img_crop, better_des_slope) if dir_in_bin is not None: - img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope ) + img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope) - mask_poly = rotate_image_with_padding(mask_poly, better_des_slope ) + mask_poly = rotate_image_with_padding(mask_poly, better_des_slope) mask_poly = mask_poly.astype('uint8') #new bounding box @@ -5879,7 +5406,6 @@ class Eynollah_ocr: if not self.do_not_mask_with_textline_contour: img_crop[mask_poly==0] = 255 - if dir_in_bin is not None: img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] if not self.do_not_mask_with_textline_contour: @@ -5887,11 +5413,14 @@ class Eynollah_ocr: if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90: if dir_in_bin is not None: - img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + img_crop, img_crop_bin = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly, img_crop_bin) else: - img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + img_crop, _ = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly) - else: better_des_slope = 0 if not self.do_not_mask_with_textline_contour: @@ -5904,13 +5433,18 @@ class Eynollah_ocr: else: if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90: if dir_in_bin is not None: - img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + img_crop, img_crop_bin = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly, img_crop_bin) else: - img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + img_crop, _ = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly) if not self.export_textline_images_and_text: if w_scaled < 750:#1.5*image_width: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop, image_height, image_width) cropped_lines.append(img_fin) if abs(better_des_slope) > 45: cropped_lines_ver_index.append(1) @@ -5919,13 +5453,15 @@ class Eynollah_ocr: cropped_lines_meging_indexing.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) else: splited_images, splited_images_bin = return_textlines_split_if_needed( img_crop, img_crop_bin if dir_in_bin is not None else None) if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images[0], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) @@ -5934,7 +5470,8 @@ class Eynollah_ocr: else: cropped_lines_ver_index.append(0) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images[1], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) @@ -5945,13 +5482,16 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images_bin[0], image_height, image_width) cropped_lines_bin.append(img_fin) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images_bin[1], image_height, image_width) cropped_lines_bin.append(img_fin) else: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -5961,7 +5501,8 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) if self.export_textline_images_and_text: @@ -5973,7 +5514,8 @@ class Eynollah_ocr: if cheild_text.tag.endswith("Unicode"): textline_text = cheild_text.text if textline_text: - base_name = os.path.join(dir_out, file_name + '_line_' + str(indexer_textlines)) + base_name = os.path.join( + dir_out, file_name + '_line_' + str(indexer_textlines)) if self.pref_of_dataset: base_name += '_' + self.pref_of_dataset if not self.do_not_mask_with_textline_contour: @@ -6063,25 +5605,31 @@ class Eynollah_ocr: preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character - masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped = \ + np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) masked_means[np.isnan(masked_means)] = 0 masked_means_ver = masked_means[indices_ver] #print(masked_means_ver, 'pred_max_not_unk') - indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + indices_where_flipped_conf_value_is_higher = \ + np.where(masked_means_flipped > masked_means_ver)[0] #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] - preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds[indices_to_be_replaced,:,:] = \ + preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] if dir_in_bin is not None: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) @@ -6090,35 +5638,42 @@ class Eynollah_ocr: preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character - masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped = \ + np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) masked_means[np.isnan(masked_means)] = 0 masked_means_ver = masked_means[indices_ver] #print(masked_means_ver, 'pred_max_not_unk') - indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + indices_where_flipped_conf_value_is_higher = \ + np.where(masked_means_flipped > masked_means_ver)[0] #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] - preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds_bin[indices_to_be_replaced,:,:] = \ + preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] preds = (preds + preds_bin) / 2. - pred_texts = decode_batch_predictions(preds, self.num_to_char) preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) for ib in range(imgs.shape[0]): pred_texts_ib = pred_texts[ib].replace("[UNK]", "") @@ -6133,31 +5688,40 @@ class Eynollah_ocr: del cropped_lines_bin gc.collect() - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] - extracted_conf_value_merged = [extracted_conf_value[ind] if cropped_lines_meging_indexing[ind]==0 else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_conf_value_merged = [extracted_conf_value[ind] + if cropped_lines_meging_indexing[ind]==0 + else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] - extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] for ind_cfm in range(len(extracted_texts_merged)) if extracted_texts_merged[ind_cfm] is not None] + extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] + for ind_cfm in range(len(extracted_texts_merged)) + if extracted_texts_merged[ind_cfm] is not None] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if dir_out_image_text: - #font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = importlib_resources.files(__package__) / "Charis-Regular.ttf" with importlib_resources.as_file(font) as font: font = ImageFont.truetype(font=font, size=40) for indexer_text, bb_ind in enumerate(total_bb_coordinates): - - x_bb = bb_ind[0] y_bb = bb_ind[1] w_bb = bb_ind[2] h_bb = bb_ind[3] - font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) ) + font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], + font.path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) @@ -6174,24 +5738,25 @@ class Eynollah_ocr: text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + ind = np.array(cropped_lines_region_indexer)==ind + extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] if len(extracted_texts_merged_un)>1: text_by_textregion_ind = "" next_glue = "" for indt in range(len(extracted_texts_merged_un)): - if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'): - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1] + if (extracted_texts_merged_un[indt].endswith('⸗') or + extracted_texts_merged_un[indt].endswith('-') or + extracted_texts_merged_un[indt].endswith('¬')): + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1] next_glue = "" else: - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt] + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt] next_glue = " " text_by_textregion.append(text_by_textregion_ind) - else: text_by_textregion.append(" ".join(extracted_texts_merged_un)) #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion') - - + ###index_tot_regions = [] ###tot_region_ref = [] @@ -6240,7 +5805,8 @@ class Eynollah_ocr: if childtest3.tag.endswith("TextEquiv"): for child_uc in childtest3: if child_uc.tag.endswith("Unicode"): - childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}") + childtest3.set('conf', + f"{extracted_conf_value_merged[indexer]:.2f}") child_uc.text = extracted_texts_merged[indexer] indexer = indexer + 1 @@ -6256,7 +5822,9 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + ###sample_order = [(id_to_order[tid], text) + ### for tid, text in zip(id_textregions, textregions_by_existing_ids) + ### if tid in id_to_order] ##ordered_texts_sample = [text for _, text in sorted(sample_order)] ##tot_page_text = ' '.join(ordered_texts_sample) @@ -6267,5 +5835,5 @@ class Eynollah_ocr: ##unicode_textpage.text = tot_page_text ET.register_namespace("",name_space) - tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None) #print("Job done in %.1fs", time.time() - t0) diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py index 89dde16..9247efe 100644 --- a/src/eynollah/image_enhancer.py +++ b/src/eynollah/image_enhancer.py @@ -6,23 +6,23 @@ from logging import Logger import os import time from typing import Optional -import atexit -from functools import partial from pathlib import Path -from multiprocessing import cpu_count import gc + import cv2 import numpy as np from ocrd_utils import getLogger, tf_disable_interactive_logs import tensorflow as tf from skimage.morphology import skeletonize from tensorflow.keras.models import load_model + from .utils.resize import resize_image from .utils.pil_cv2 import pil2cv from .utils import ( is_image_filename, crop_image_inside_box ) +from .eynollah import PatchEncoder, Patches DPI_THRESHOLD = 298 KERNEL = np.ones((5, 5), np.uint8) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 45db8e4..1b991ae 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -6,25 +6,24 @@ from logging import Logger import os import time from typing import Optional -import atexit -from functools import partial from pathlib import Path -from multiprocessing import cpu_count import xml.etree.ElementTree as ET + import cv2 import numpy as np from ocrd_utils import getLogger import statistics import tensorflow as tf from tensorflow.keras.models import load_model -from .utils.resize import resize_image +from .utils.resize import resize_image from .utils.contour import ( find_new_features_of_contours, return_contours_of_image, return_parent_contours, ) from .utils import is_xml_filename +from .eynollah import PatchEncoder, Patches DPI_THRESHOLD = 298 KERNEL = np.ones((5, 5), np.uint8) @@ -806,7 +805,7 @@ class machine_based_reading_order_on_layout: tree_xml.write(os.path.join(dir_out, file_name+'.xml'), xml_declaration=True, method='xml', - encoding="utf8", + encoding="utf-8", default_namespace=None) #sys.exit() diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index 5d89c92..f9c6f4d 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "0.5.0", + "version": "0.6.0rc2", "git_url": "https://github.com/qurator-spk/eynollah", "dockerhub": "ocrd/eynollah", "tools": { diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index de083f5..5ccb2af 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1,3 +1,5 @@ +from typing import Tuple +from logging import getLogger import time import math @@ -13,10 +15,21 @@ from scipy.ndimage import gaussian_filter1d from .is_nan import isNaN from .contour import (contours_in_same_horizon, + find_center_of_contours, find_new_features_of_contours, return_contours_of_image, return_parent_contours) +def pairwise(iterable): + # pairwise('ABCDEFG') → AB BC CD DE EF FG + + iterator = iter(iterable) + a = next(iterator, None) + + for b in iterator: + yield a, b + a = b + def return_x_start_end_mothers_childs_and_type_of_reading_order( x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): @@ -138,7 +151,8 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( min_ys=np.min(y_sep) max_ys=np.max(y_sep) - y_mains= [min_ys] + y_mains=[] + y_mains.append(min_ys) y_mains_sep_ohne_grenzen=[] for ii in range(len(new_main_sep_y)): @@ -297,9 +311,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_end_with_child_without_mother, new_main_sep_y) +def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: + return (box[1], box[1] + box[3], + box[0], box[0] + box[2]) + +def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]: + return (slice(box[1], box[1] + box[3]), + slice(box[0], box[0] + box[2])) + def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] - return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + image_box = img_org_copy[box2slice(box)] + return image_box, box2rect(box) def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) @@ -372,6 +394,10 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8): return np.std(z) def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): + if not regions_without_separators.any(): + return 0, [] + #plt.imshow(regions_without_separators) + #plt.show() regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() @@ -391,6 +417,9 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl zneg = gaussian_filter1d(zneg, sigma_) peaks_neg, _ = find_peaks(zneg, height=0) + #plt.plot(zneg) + #plt.plot(peaks_neg, zneg[peaks_neg], 'rx') + #plt.show() peaks, _ = find_peaks(z, height=0) peaks_neg = peaks_neg - 10 - 10 @@ -405,9 +434,13 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl (peaks_neg < (regions_without_separators.shape[1] - 370))] interest_pos = z[peaks] interest_pos = interest_pos[interest_pos > 10] + if not interest_pos.any(): + return 0, [] # plt.plot(z) # plt.show() interest_neg = z[peaks_neg] + if not interest_neg.any(): + return 0, [] min_peaks_pos = np.min(interest_pos) max_peaks_pos = np.max(interest_pos) @@ -492,7 +525,8 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl # print(forest[np.argmin(z[forest]) ] ) if not isNaN(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) - forest = [peaks_neg_fin[i + 1]] + forest = [] + forest.append(peaks_neg_fin[i + 1]) if i == (len(peaks_neg_fin) - 1): # print(print(forest[np.argmin(z[forest]) ] )) if not isNaN(forest[np.argmin(z[forest])]): @@ -660,7 +694,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): # print(forest[np.argmin(z[forest]) ] ) if not isNaN(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) - forest = [peaks_neg_fin[i + 1]] + forest = [] + forest.append(peaks_neg_fin[i + 1]) if i == (len(peaks_neg_fin) - 1): # print(print(forest[np.argmin(z[forest]) ] )) if not isNaN(forest[np.argmin(z[forest])]): @@ -761,7 +796,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): return len(peaks_fin_true), peaks_fin_true def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0) + regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() @@ -788,7 +823,10 @@ def return_regions_without_separators(regions_pre): return regions_without_separators def put_drop_out_from_only_drop_model(layout_no_patch, layout1): - drop_only = (layout_no_patch[:, :, 0] == 4) * 1 + if layout_no_patch.ndim == 3: + layout_no_patch = layout_no_patch[:, :, 0] + + drop_only = (layout_no_patch[:, :] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -814,9 +852,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): (map_of_drop_contour_bb == 5).sum()) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) - layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0 - - layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4)) + layout_no_patch[:, :][layout_no_patch[:, :] == 4] = 0 + layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=4) return layout_no_patch @@ -890,29 +927,28 @@ def check_any_text_region_in_model_one_is_main_or_header( contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) - img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + for ii, con in enumerate(contours_only_text_parent): + img = np.zeros(regions_model_1.shape[:2]) + img = cv2.fillPoly(img, pts=[con], color=255) - all_pixels=((img[:,:,0]==255)*1).sum() - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() + all_pixels=((img == 255)*1).sum() + pixels_header=( ( (img == 255) & (regions_model_full[:,:,0]==2) )*1 ).sum() pixels_main=all_pixels-pixels_header if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ]=2 contours_only_text_parent_head.append(con) - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) conf_contours_head.append(None) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ]=1 contours_only_text_parent_main.append(con) conf_contours_main.append(conf_contours[ii]) - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) @@ -952,11 +988,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light( regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full.shape[0] // zoom), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] + contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent] ### cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ - find_new_features_of_contours(contours_only_text_parent) + find_new_features_of_contours(contours_only_text_parent_z) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main @@ -979,35 +1015,39 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) - img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + for ii, con in enumerate(contours_only_text_parent_z): + img = np.zeros(regions_model_1.shape[:2]) + img = cv2.fillPoly(img, pts=[con], color=255) - all_pixels = (img[:,:,0]==255).sum() - pixels_header=((img[:,:,0]==255) & + all_pixels = (img == 255).sum() + pixels_header=((img == 255) & (regions_model_full[:,:,0]==2)).sum() pixels_main = all_pixels - pixels_header - if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 - contours_only_text_parent_head.append(con) - if contours_only_text_parent_d_ordered is not None: + if (( pixels_header / float(pixels_main) >= 0.6 and + length_con[ii] / float(height_con[ii]) >= 1.3 and + length_con[ii] / float(height_con[ii]) <= 3 ) or + ( pixels_header / float(pixels_main) >= 0.3 and + length_con[ii] / float(height_con[ii]) >=3 )): + + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 2 + contours_only_text_parent_head.append(contours_only_text_parent[ii]) + conf_contours_head.append(None) # why not conf_contours[ii], too? + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) - conf_contours_head.append(None) + else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 - contours_only_text_parent_main.append(con) + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 1 + contours_only_text_parent_main.append(contours_only_text_parent[ii]) conf_contours_main.append(conf_contours[ii]) - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) - #print(all_pixels,pixels_main,pixels_header) ### to make it faster @@ -1015,8 +1055,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light( # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full.shape[0] // zoom), # interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] - contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### return (regions_model_1, @@ -1082,11 +1120,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_big.append(textlines_tot[i]) textlines_big_org_form.append(textlines_tot_org_form[i]) - img_textline_s = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_textline_s = cv2.fillPoly(img_textline_s, pts=textlines_small, color=(1, 1, 1)) + img_textline_s = np.zeros(textline_iamge.shape[:2]) + img_textline_s = cv2.fillPoly(img_textline_s, pts=textlines_small, color=1) - img_textline_b = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_textline_b = cv2.fillPoly(img_textline_b, pts=textlines_big, color=(1, 1, 1)) + img_textline_b = np.zeros(textline_iamge.shape[:2]) + img_textline_b = cv2.fillPoly(img_textline_b, pts=textlines_big, color=1) sum_small_big_all = img_textline_s + img_textline_b sum_small_big_all2 = (sum_small_big_all[:, :] == 2) * 1 @@ -1098,11 +1136,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) # print(len(textlines_small),'small') intersections = [] for z2 in range(len(textlines_big)): - img_text = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_text = cv2.fillPoly(img_text, pts=[textlines_small[z1]], color=(1, 1, 1)) + img_text = np.zeros(textline_iamge.shape[:2]) + img_text = cv2.fillPoly(img_text, pts=[textlines_small[z1]], color=1) - img_text2 = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z2]], color=(1, 1, 1)) + img_text2 = np.zeros(textline_iamge.shape[:2]) + img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z2]], color=1) sum_small_big = img_text2 + img_text sum_small_big_2 = (sum_small_big[:, :] == 2) * 1 @@ -1128,19 +1166,17 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) index_small_textlines = list(np.where(np.array(dis_small_from_bigs_tot) == z)[0]) # print(z,index_small_textlines) - img_text2 = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1], 3)) - img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z]], color=(255, 255, 255)) + img_text2 = np.zeros(textline_iamge.shape[:2], dtype=np.uint8) + img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z]], color=255) textlines_big_with_change.append(z) for k in index_small_textlines: - img_text2 = cv2.fillPoly(img_text2, pts=[textlines_small[k]], color=(255, 255, 255)) + img_text2 = cv2.fillPoly(img_text2, pts=[textlines_small[k]], color=255) textlines_small_with_change.append(k) - img_text2 = img_text2.astype(np.uint8) - imgray = cv2.cvtColor(img_text2, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - cont, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(img_text2, 0, 255, 0) + cont, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(cont[0],type(cont)) textlines_big_with_change_con.append(cont) @@ -1152,110 +1188,51 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) # print(textlines_big_with_change,'textlines_big_with_change') # print(textlines_small_with_change,'textlines_small_with_change') # print(textlines_big) - textlines_con_changed.append(textlines_big_org_form) - else: - textlines_con_changed.append(textlines_big_org_form) + + textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed -def order_of_regions(textline_mask, contours_main, contours_header, y_ref): +def order_of_regions(textline_mask, contours_main, contours_head, y_ref): ##plt.imshow(textline_mask) ##plt.show() - """ - print(len(contours_main),'contours_main') - mada_n=textline_mask.sum(axis=1) - y=mada_n[:] - - y_help=np.zeros(len(y)+40) - y_help[20:len(y)+20]=y - x=np.arange(len(y)) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - ##plt.imshow(textline_mask[:,:]) - ##plt.show() - - sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) - zneg[20:len(zneg_rev)+20]=zneg_rev - zneg= gaussian_filter1d(zneg, sigma_gaus) - - peaks, _ = find_peaks(z, height=0) - peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg=peaks_neg-20-20 - peaks=peaks-20 - """ - textline_sum_along_width = textline_mask.sum(axis=1) - - y = textline_sum_along_width[:] + y = textline_mask.sum(axis=1) # horizontal projection profile y_padded = np.zeros(len(y) + 40) y_padded[20 : len(y) + 20] = y - x = np.arange(len(y)) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) sigma_gaus = 8 - z = gaussian_filter1d(y_padded, sigma_gaus) - zneg_rev = -y_padded + np.max(y_padded) + #z = gaussian_filter1d(y_padded, sigma_gaus) + #peaks, _ = find_peaks(z, height=0) + #peaks = peaks - 20 + zneg_rev = np.max(y_padded) - y_padded zneg = np.zeros(len(zneg_rev) + 40) zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) - peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) peaks_neg = peaks_neg - 20 - 20 - peaks = peaks - 20 ##plt.plot(z) ##plt.show() - if contours_main is not None: - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) + cx_main, cy_main = find_center_of_contours(contours_main) + cx_head, cy_head = find_center_of_contours(contours_head) - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + peaks_neg_new = np.append(np.insert(peaks_neg, 0, 0), textline_mask.shape[0]) + # offset from bbox of mask + peaks_neg_new += y_ref - if len(contours_header) is not None: - areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))]) - M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))] - cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))] - cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))] + # assert not len(cy_main) or np.min(peaks_neg_new) <= np.min(cy_main) and np.max(cy_main) <= np.max(peaks_neg_new) + # assert not len(cy_head) or np.min(peaks_neg_new) <= np.min(cy_head) and np.max(cy_head) <= np.max(peaks_neg_new) - x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) - x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) - - y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))]) - y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))]) - # print(cy_main,'mainy') - - peaks_neg_new = [0 + y_ref] - for iii in range(len(peaks_neg)): - peaks_neg_new.append(peaks_neg[iii] + y_ref) - peaks_neg_new.append(textline_mask.shape[0] + y_ref) - - if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new): - cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10 - if contours_main is not None: - indexer_main = np.arange(len(contours_main)) - if contours_main is not None: - len_main = len(contours_main) - else: - len_main = 0 - - matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5)) - matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header)) + matrix_of_orders = np.zeros((len(contours_main) + len(contours_head), 5), dtype=int) + matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_head)) matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[len(contours_main) :, 1] = 2 matrix_of_orders[: len(contours_main), 2] = cx_main - matrix_of_orders[len(contours_main) :, 2] = cx_header + matrix_of_orders[len(contours_main) :, 2] = cx_head matrix_of_orders[: len(contours_main), 3] = cy_main - matrix_of_orders[len(contours_main) :, 3] = cy_header + matrix_of_orders[len(contours_main) :, 3] = cy_head matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) - matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) + matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_head)) # print(peaks_neg_new,'peaks_neg_new') # print(matrix_of_orders,'matrix_of_orders') @@ -1263,70 +1240,42 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): final_indexers_sorted = [] final_types = [] final_index_type = [] - for i in range(len(peaks_neg_new) - 1): - top = peaks_neg_new[i] - down = peaks_neg_new[i + 1] - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] - cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] - cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] - types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] - index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] + for top, bot in pairwise(peaks_neg_new): + indexes_in, types_in, cxs_in, cys_in, typed_indexes_in = \ + matrix_of_orders[(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < bot)].T sorted_inside = np.argsort(cxs_in) - ind_in_int = indexes_in[sorted_inside] - ind_in_type = types_of_text[sorted_inside] - ind_ind_type = index_types_of_text[sorted_inside] - for j in range(len(ind_in_int)): - final_indexers_sorted.append(int(ind_in_int[j])) - final_types.append(int(ind_in_type[j])) - final_index_type.append(int(ind_ind_type[j])) + final_indexers_sorted.extend(indexes_in[sorted_inside]) + final_types.extend(types_in[sorted_inside]) + final_index_type.extend(typed_indexes_in[sorted_inside]) ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # This fix is applied if the sum of the lengths of contours and contours_h - # does not match final_indexers_sorted. However, this is not the optimal solution.. - if len(cy_main) + len(cy_header) == len(final_index_type): - pass - else: - indexes_missed = set(np.arange(len(cy_main) + len(cy_header))) - set(final_indexers_sorted) - for ind_missed in indexes_missed: - final_indexers_sorted.append(ind_missed) - final_types.append(1) - final_index_type.append(ind_missed) + # assert len(final_indexers_sorted) == len(contours_main) + len(contours_head) + # assert not len(final_indexers_sorted) or max(final_index_type) == max(len(contours_main) - return final_indexers_sorted, matrix_of_orders, final_types, final_index_type + return np.array(final_indexers_sorted), np.array(final_types), np.array(final_index_type) def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( img_p_in_ver, img_in_hor,num_col_classifier): #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) - img_p_in_ver=img_p_in_ver.astype(np.uint8) - img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(img_p_in_ver, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(img_p_in_ver, 0, 255, 0) + contours_lines_ver, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \ find_features_of_lines(contours_lines_ver) for i in range(len(x_min_main_ver)): img_p_in_ver[int(y_min_main_ver[i]): int(y_min_main_ver[i])+30, int(cx_main_ver[i])-25: - int(cx_main_ver[i])+25, 0] = 0 + int(cx_main_ver[i])+25] = 0 img_p_in_ver[int(y_max_main_ver[i])-30: int(y_max_main_ver[i]), int(cx_main_ver[i])-25: - int(cx_main_ver[i])+25, 0] = 0 + int(cx_main_ver[i])+25] = 0 - img_in_hor=img_in_hor.astype(np.uint8) - img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(img_in_hor, 0, 255, 0) + contours_lines_hor, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \ find_features_of_lines(contours_lines_hor) @@ -1382,35 +1331,33 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( img_p_in=img_in_hor special_separators=[] - img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 - sep_ver_hor=img_p_in+img_p_in_ver - sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 - sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) - sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) - imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) - for ii in range(len(cx_cross)): - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 + img_p_in_ver[img_p_in_ver == 255] = 1 + sep_ver_hor = img_p_in + img_p_in_ver + sep_ver_hor_cross = (sep_ver_hor == 2) * 1 + _, thresh = cv2.threshold(sep_ver_hor_cross.astype(np.uint8), 0, 255, 0) + contours_cross, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + center_cross = np.array(find_center_of_contours(contours_cross), dtype=int) + for cx, cy in center_cross.T: + img_p_in[cy - 30: cy + 30, cx + 5: cx + 40] = 0 + img_p_in[cy - 30: cy + 30, cx - 40: cx - 4] = 0 else: img_p_in=np.copy(img_in_hor) special_separators=[] - return img_p_in[:,:,0], special_separators + return img_p_in, special_separators def return_points_with_boundies(peaks_neg_fin, first_point, last_point): - peaks_neg_tot = [first_point] + peaks_neg_tot = [] + peaks_neg_tot.append(first_point) for ii in range(len(peaks_neg_fin)): peaks_neg_tot.append(peaks_neg_fin[ii]) peaks_neg_tot.append(last_point) return peaks_neg_tot -def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): +def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, label_lines, contours_h=None): t_ins_c0 = time.time() - separators_closeup= (region_pre_p[:, :, :] == pixel_lines) * 1 - separators_closeup[0:110,:,:]=0 - separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 + separators_closeup=( (region_pre_p[:,:]==label_lines))*1 + separators_closeup[0:110,:]=0 + separators_closeup[separators_closeup.shape[0]-150:,:]=0 kernel = np.ones((5,5),np.uint8) separators_closeup=separators_closeup.astype(np.uint8) @@ -1422,15 +1369,11 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_n=separators_closeup_n.astype(np.uint8) separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) - separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] + separators_closeup_n_binary[:,:]=separators_closeup_n[:,:] separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) - gray_early=gray_early.astype(np.uint8) - imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) - ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - - contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh_e = cv2.threshold(separators_closeup_n_binary, 0, 255, 0) + contours_line_e, _ = cv2.findContours(thresh_e.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) _, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \ find_features_of_lines(contours_line_e) dist_ye = y_max_main - y_min_main @@ -1440,14 +1383,12 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - figs_e=np.zeros(thresh_e.shape) - figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0,0,0)) + separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=0) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) - bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ cv2.THRESH_BINARY, 15, -2) horizontal = np.copy(bw) vertical = np.copy(bw) @@ -1463,7 +1404,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, kernel = np.ones((5,5),np.uint8) horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255,255,255)) + horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=255) rows = vertical.shape[0] verticalsize = rows // 30 @@ -1481,13 +1422,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) - vertical=vertical.astype(np.uint8) - - imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(vertical, 0, 255, 0) + contours_line_vers, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ find_features_of_lines(contours_line_vers) @@ -1502,11 +1438,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, dist_y_ver=y_max_main_ver-y_min_main_ver len_y=separators_closeup.shape[0]/3.0 - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) - horizontal=horizontal.astype(np.uint8) - imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(horizontal, 0, 255, 0) + contours_line_hors, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ find_features_of_lines(contours_line_hors) @@ -1583,7 +1516,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, args_cy_splitter=np.argsort(cy_main_splitters) cy_main_splitters_sort=cy_main_splitters[args_cy_splitter] - splitter_y_new= [0] + splitter_y_new=[] + splitter_y_new.append(0) for i in range(len(cy_main_splitters_sort)): splitter_y_new.append( cy_main_splitters_sort[i] ) splitter_y_new.append(region_pre_p.shape[0]) @@ -1598,7 +1532,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin_fin=[] for itiles in args_big_parts: regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]): - int(splitter_y_new[itiles+1]),:,0] + int(splitter_y_new[itiles+1]),:] try: num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) @@ -1620,12 +1554,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, def return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, - num_col_classifier, erosion_hurts, tables, right2left_readingorder): + num_col_classifier, erosion_hurts, tables, + right2left_readingorder, + logger=None): if right2left_readingorder: regions_without_separators = cv2.flip(regions_without_separators,1) + if logger is None: + logger = getLogger(__package__) + logger.debug('enter return_boxes_of_images_by_order_of_reading_new') + boxes=[] peaks_neg_tot_tables = [] + splitter_y_new = np.array(splitter_y_new, dtype=int) for i in range(len(splitter_y_new)-1): #print(splitter_y_new[i],splitter_y_new[i+1]) matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & @@ -1638,26 +1579,22 @@ def return_boxes_of_images_by_order_of_reading_new( # 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))): if True: try: - if erosion_hurts: - num_col, peaks_neg_fin = find_num_col( - regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], - num_col_classifier, tables, multiplier=6.) - else: - num_col, peaks_neg_fin = find_num_col( - regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], - num_col_classifier, tables, multiplier=7.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :], + num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.) except: peaks_neg_fin=[] num_col = 0 try: - peaks_neg_fin_org=np.copy(peaks_neg_fin) if (len(peaks_neg_fin)+1)=2 or there_is_sep_with_child==1))): try: - y_grenze=int(splitter_y_new[i])+300 + y_grenze = splitter_y_new[i] + 300 #check if there is a big separator in this y_mains_sep_ohne_grenzen args_early_ys=np.arange(len(y_type_2)) #print(args_early_ys,'args_early_ys') - #print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) + #print(splitter_y_new[i], splitter_y_new[i+1]) - x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) & + x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) & + x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) & + args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] if len(y_type_2_up) > 0: y_main_separator_up = y_type_2_up [(x_starting_up==0) & @@ -1769,8 +1703,8 @@ def return_boxes_of_images_by_order_of_reading_new( args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) )) #print(args_to_be_kept,'args_to_be_kept') boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], - int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))]) - splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] + splitter_y_new[i], y_diff_main_separator_up.max()]) + splitter_y_new[i] = y_diff_main_separator_up.max() #print(splitter_y_new[i],'splitter_y_new[i]') y_type_2 = y_type_2[args_to_be_kept] @@ -1779,29 +1713,28 @@ def return_boxes_of_images_by_order_of_reading_new( y_diff_type_2 = y_diff_type_2[args_to_be_kept] #print('galdiha') - y_grenze=int(splitter_y_new[i])+200 + y_grenze = splitter_y_new[i] + 200 args_early_ys2=np.arange(len(y_type_2)) - y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) & + x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) & + x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & + args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] #print(y_type_2_up,x_starting_up,x_ending_up,'didid') - nodes_in = [] + nodes_in = set() for ij in range(len(x_starting_up)): - nodes_in = nodes_in + list(range(int(x_starting_up[ij]), - int(x_ending_up[ij]))) - nodes_in = np.unique(nodes_in) + nodes_in.update(range(x_starting_up[ij], + x_ending_up[ij])) #print(nodes_in,'nodes_in') - if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): + if nodes_in == set(range(len(peaks_neg_tot)-1)): pass - elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)): + elif nodes_in == set(range(1, len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') @@ -1816,17 +1749,16 @@ def return_boxes_of_images_by_order_of_reading_new( pass #print('burdaydikh2') elif len(y_diff_main_separator_up)==0: - nodes_in = [] + nodes_in = set() for ij in range(len(x_starting_up)): - nodes_in = nodes_in + list(range(int(x_starting_up[ij]), - int(x_ending_up[ij]))) - nodes_in = np.unique(nodes_in) + nodes_in.update(range(x_starting_up[ij], + x_ending_up[ij])) #print(nodes_in,'nodes_in2') #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') - if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): + if nodes_in == set(range(len(peaks_neg_tot)-1)): pass - elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)): + elif nodes_in == set(range(1,len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') @@ -1851,26 +1783,25 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order=[] if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: if reading_order_type==1: - y_lines_by_order.append(int(splitter_y_new[i])) + y_lines_by_order.append(splitter_y_new[i]) x_start_by_order.append(0) x_end_by_order.append(len(peaks_neg_tot)-2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(int(x_start_without_mother[dj]), - int(x_end_without_mother[dj]))) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + columns_covered_by_mothers.update( + range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_not_covered = list(all_columns - columns_covered_by_mothers) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + + len(x_start_without_mother), + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, np.array(columns_not_covered, int)) x_starting = np.append(x_starting, x_start_without_mother) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1) x_ending = np.append(x_ending, x_end_without_mother) ind_args=np.arange(len(y_type_2)) @@ -1899,42 +1830,39 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order.append(x_end_column_sort[ii]-1) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(int(x_start_without_mother[dj]), - int(x_end_without_mother[dj]))) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + columns_covered_by_mothers.update( + range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_not_covered = list(all_columns - columns_covered_by_mothers) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother), + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, np.array(columns_not_covered, int)) x_starting = np.append(x_starting, x_start_without_mother) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1) x_ending = np.append(x_ending, x_end_without_mother) - columns_covered_by_with_child_no_mothers = [] + columns_covered_by_with_child_no_mothers = set() for dj in range(len(x_end_with_child_without_mother)): - columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ - list(range(int(x_start_with_child_without_mother[dj]), - int(x_end_with_child_without_mother[dj]))) - columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) - - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers)) + columns_covered_by_with_child_no_mothers.update( + range(x_start_with_child_without_mother[dj], + x_end_with_child_without_mother[dj])) + columns_not_covered_child_no_mother = list( + all_columns - columns_covered_by_with_child_no_mothers) #indexes_to_be_spanned=[] for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother) ind_args = np.arange(len(y_type_2)) - x_end_with_child_without_mother = np.array(x_end_with_child_without_mother) - x_start_with_child_without_mother = np.array(x_start_with_child_without_mother) + x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int) + x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int) for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] + x_end_biggest_column = \ + x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & (x_ending==x_end_biggest_column)] y_column_nc = y_type_2[args_all_biggest_lines] @@ -1944,7 +1872,7 @@ def return_boxes_of_images_by_order_of_reading_new( for i_c in range(len(y_column_nc)): if i_c==(len(y_column_nc)-1): ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & - (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] else: @@ -1960,21 +1888,19 @@ def return_boxes_of_images_by_order_of_reading_new( if len(x_diff_all_between_nm_wc)>0: biggest=np.argmax(x_diff_all_between_nm_wc) - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_starting_all_between_nm_wc)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(int(x_starting_all_between_nm_wc[dj]), - int(x_ending_all_between_nm_wc[dj]))) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(i_s_nc, x_end_biggest_column) - columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + columns_covered_by_mothers.update( + range(x_starting_all_between_nm_wc[dj], + x_ending_all_between_nm_wc[dj])) + child_columns = set(range(i_s_nc, x_end_biggest_column)) + columns_not_covered = list(child_columns - columns_covered_by_mothers) should_longest_line_be_extended=0 if (len(x_diff_all_between_nm_wc) > 0 and - set(list(range(int(x_starting_all_between_nm_wc[biggest]), - int(x_ending_all_between_nm_wc[biggest]))) + - list(columns_not_covered)) != set(all_columns)): + set(list(range(x_starting_all_between_nm_wc[biggest], + x_ending_all_between_nm_wc[biggest])) + + list(columns_not_covered)) != child_columns): should_longest_line_be_extended=1 index_lines_so_close_to_top_separator = \ np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & @@ -1984,9 +1910,12 @@ def return_boxes_of_images_by_order_of_reading_new( np.array(list(set(list(range(len(y_all_between_nm_wc)))) - set(list(index_lines_so_close_to_top_separator)))) if len(indexes_remained_after_deleting_closed_lines) > 0: - y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + y_all_between_nm_wc = \ + y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = \ + x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = \ + x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) @@ -1998,11 +1927,11 @@ def return_boxes_of_images_by_order_of_reading_new( x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest]) x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest]) except: - pass + logger.exception("cannot append") y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered)) - x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered) - x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int)) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1) ind_args_between=np.arange(len(x_ending_all_between_nm_wc)) for column in range(int(i_s_nc), int(x_end_biggest_column)): @@ -2071,52 +2000,50 @@ def return_boxes_of_images_by_order_of_reading_new( if len(y_in_cols)>0: y_down=np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] + y_down=splitter_y_new[i+1] #print(y_itself,'y_itself') boxes.append([peaks_neg_tot[column], peaks_neg_tot[column+1], y_itself, y_down]) except: + logger.exception("cannot assign boxes") boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], - int(splitter_y_new[i]), int(splitter_y_new[i+1])]) + splitter_y_new[i], splitter_y_new[i+1]]) else: y_lines_by_order=[] x_start_by_order=[] x_end_by_order=[] if len(x_starting)>0: - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_covered_by_lines_covered_more_than_2col = [] + columns_covered_by_lines_covered_more_than_2col = set() for dj in range(len(x_starting)): - if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns): - pass - else: - columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ - list(range(int(x_starting[dj]),int(x_ending[dj]) )) - columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col)) - columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) + if set(range(x_starting[dj], x_ending[dj])) != all_columns: + columns_covered_by_lines_covered_more_than_2col.update( + range(x_starting[dj], x_ending[dj])) + columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1, + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) if len(new_main_sep_y) > 0: x_starting = np.append(x_starting, 0) - x_ending = np.append(x_ending, len(peaks_neg_tot)-1) + x_ending = np.append(x_ending, len(peaks_neg_tot) - 1) else: x_starting = np.append(x_starting, x_starting[0]) x_ending = np.append(x_ending, x_ending[0]) else: - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered = list(set(all_columns)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + columns_not_covered = list(all_columns) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered), + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) - ind_args=np.array(range(len(y_type_2))) + ind_args = np.arange(len(y_type_2)) for column in range(len(peaks_neg_tot)-1): #print(column,'column') @@ -2148,7 +2075,7 @@ def return_boxes_of_images_by_order_of_reading_new( x_start_itself=x_start_copy.pop(il) x_end_itself=x_end_copy.pop(il) - for column in range(int(x_start_itself), int(x_end_itself)+1): + for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] for yic in range(len(y_copy)): @@ -2162,7 +2089,7 @@ def return_boxes_of_images_by_order_of_reading_new( if len(y_in_cols)>0: y_down=np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] + y_down=splitter_y_new[i+1] #print(y_itself,'y_itself') boxes.append([peaks_neg_tot[column], peaks_neg_tot[column+1], @@ -2184,9 +2111,10 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_new = regions_without_separators.shape[1] - boxes[i][0] boxes[i][0] = x_start_new boxes[i][1] = x_end_new - return boxes, peaks_neg_tot_tables_new - else: - return boxes, peaks_neg_tot_tables + peaks_neg_tot_tables = peaks_neg_tot_tables_new + + logger.debug('exit return_boxes_of_images_by_order_of_reading_new') + return boxes, peaks_neg_tot_tables def is_image_filename(fname: str) -> bool: return fname.lower().endswith(('.jpg', diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 0be8879..f304db2 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,7 +1,15 @@ +from typing import Sequence, Union +from numbers import Number from functools import partial +import itertools + import cv2 import numpy as np -from shapely import geometry +from scipy.sparse.csgraph import minimum_spanning_tree +from shapely.geometry import Polygon, LineString +from shapely.geometry.polygon import orient +from shapely import set_precision +from shapely.ops import unary_union, nearest_points from .rotate import rotate_image, rotation_image_new @@ -28,38 +36,31 @@ def find_contours_mean_y_diff(contours_main): return np.mean(np.diff(np.sort(np.array(cy_main)))) def get_text_region_boxes_by_given_contours(contours): - boxes = [] - contours_new = [] - for jj in range(len(contours)): - box = cv2.boundingRect(contours[jj]) - boxes.append(box) - contours_new.append(contours[jj]) + return [cv2.boundingRect(contour) + for contour in contours] - return boxes, contours_new - -def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): +def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] - for jv,c in enumerate(contours): - if len(c) < 3: # A polygon cannot have less than 3 points + for jv, contour in enumerate(contours): + if len(contour) < 3: # A polygon cannot have less than 3 points continue - polygon = geometry.Polygon([point[0] for point in c]) + polygon = contour2polygon(contour, dilate=dilate) area = polygon.area if (area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): - found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords], dtype=np.uint)) + found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early -def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): +def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] - for jv,c in enumerate(contours): - if len(c) < 3: # A polygon cannot have less than 3 points + for jv, contour in enumerate(contours): + if len(contour) < 3: # A polygon cannot have less than 3 points continue - polygon = geometry.Polygon([point[0] for point in c]) - # area = cv2.contourArea(c) + polygon = contour2polygon(contour, dilate=dilate) + # area = cv2.contourArea(contour) area = polygon.area ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area @@ -68,66 +69,41 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m area <= max_area * np.prod(image.shape[:2]) and # hierarchy[0][jv][3]==-1 True): - # print(c[0][0][1]) - found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords], dtype=np.int32)) + # print(contour[0][0][1]) + found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early -def find_new_features_of_contours(contours_main): - areas_main = np.array([cv2.contourArea(contours_main[j]) - for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) - for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) - for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) - for j in range(len(M_main))] - try: - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) - for j in range(len(contours_main))]) - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) - for j in range(len(contours_main))]) - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] - for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] - for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) - for j in range(len(contours_main))]) - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) - for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) - for j in range(len(contours_main))]) - except: - x_min_main = np.array([np.min(contours_main[j][:, 0]) - for j in range(len(contours_main))]) - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) - for j in range(len(contours_main))]) - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] - for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] - for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0]) - for j in range(len(contours_main))]) - y_min_main = np.array([np.min(contours_main[j][:, 1]) - for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 1]) - for j in range(len(contours_main))]) - # dis_x=np.abs(x_max_main-x_min_main) +def find_center_of_contours(contours): + moments = [cv2.moments(contour) for contour in contours] + cx = [feat["m10"] / (feat["m00"] + 1e-32) + for feat in moments] + cy = [feat["m01"] / (feat["m00"] + 1e-32) + for feat in moments] + return cx, cy - return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin +def find_new_features_of_contours(contours): + # areas = np.array([cv2.contourArea(contour) for contour in contours]) + cx, cy = find_center_of_contours(contours) + slice_x = np.index_exp[:, 0, 0] + slice_y = np.index_exp[:, 0, 1] + if any(contour.ndim < 3 for contour in contours): + slice_x = np.index_exp[:, 0] + slice_y = np.index_exp[:, 1] + x_min = np.array([np.min(contour[slice_x]) for contour in contours]) + x_max = np.array([np.max(contour[slice_x]) for contour in contours]) + y_min = np.array([np.min(contour[slice_y]) for contour in contours]) + y_max = np.array([np.max(contour[slice_y]) for contour in contours]) + # dis_x=np.abs(x_max-x_min) + y_corr_x_min = np.array([contour[np.argmin(contour[slice_x])][slice_y[1:]] + for contour in contours]) -def find_features_of_contours(contours_main): - areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))]) - x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))]) + return cx, cy, x_min, x_max, y_min, y_max, y_corr_x_min - y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) +def find_features_of_contours(contours): + y_min = np.array([np.min(contour[:,0,1]) for contour in contours]) + y_max = np.array([np.max(contour[:,0,1]) for contour in contours]) - return y_min_main, y_max_main + return y_min, y_max def return_parent_contours(contours, hierarchy): contours_parent = [contours[i] @@ -135,16 +111,13 @@ def return_parent_contours(contours, hierarchy): if hierarchy[0][i][3] == -1] return contours_parent -def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): +def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + if region_pre_p.ndim == 3: + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + cnts_images = (region_pre_p[:, :] == label) * 1 + _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) @@ -153,13 +126,11 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): return contours_imgs def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[contour], color=1) img_copy = rotation_image_new(img_copy, -slope_first) - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) @@ -182,8 +153,8 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): cnts_org = [] # print(cnts,'cnts') for i in range(len(cnts)): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=1) # plt.imshow(img_copy) # plt.show() @@ -194,9 +165,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): # plt.imshow(img_copy) # plt.show() - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) @@ -213,12 +182,11 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): interpolation=cv2.INTER_NEAREST) cnts_org = [] for cnt in cnts: - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[(cnt / zoom).astype(int)], color=(1, 1, 1)) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[cnt // zoom], color=1) img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) @@ -228,14 +196,13 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): return cnts_org def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first, confidence_matrix): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - confidence_matrix_mapped_with_contour = confidence_matrix * img_copy[:,:,0] - confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy[:,:,0])) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=1) + confidence_matrix_mapped_with_contour = confidence_matrix * img_copy + confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy)) img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(cont_int)==0: @@ -246,34 +213,27 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) return cont_int[0], index_r_con, confidence_contour -def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map): +def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): if not len(cnts): - return [], [] - - confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) - img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) - ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) - #cnts = cnts/2 - cnts = [(i/6).astype(int) for i in cnts] - results = map(partial(do_back_rotation_and_get_cnt_back, - img=img, - slope_first=slope_first, - confidence_matrix=confidence_matrix, - ), - cnts, range(len(cnts))) - contours, indexes, conf_contours = tuple(zip(*results)) - return [i*6 for i in contours], list(conf_contours) + return [] -def return_contours_of_interested_textline(region_pre_p, pixel): + confidence_matrix = cv2.resize(confidence_matrix, + (img.shape[1] // 6, img.shape[0] // 6), + interpolation=cv2.INTER_NEAREST) + confs = [] + for cnt in cnts: + cnt_mask = np.zeros(confidence_matrix.shape) + cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0) + confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) + return confs + +def return_contours_of_interested_textline(region_pre_p, label): # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + if region_pre_p.ndim == 3: + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + cnts_images = (region_pre_p[:, :] == label) * 1 + _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) @@ -283,51 +243,123 @@ def return_contours_of_interested_textline(region_pre_p, pixel): def return_contours_of_image(image): if len(image.shape) == 2: - image = np.repeat(image[:, :, np.newaxis], 3, axis=2) image = image.astype(np.uint8) + imgray = image else: image = image.astype(np.uint8) - imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy -def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): - # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 - else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) +def dilate_textline_contours(all_found_textline_polygons): + return [[polygon2contour(contour2polygon(contour, dilate=6)) + for contour in region] + for region in all_found_textline_polygons] - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables( - thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size) +def dilate_textregion_contours(all_found_textline_polygons): + return [polygon2contour(contour2polygon(contour, dilate=6)) + for contour in all_found_textline_polygons] - return contours_imgs +def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): + polygon = Polygon([point[0] for point in contour]) + if dilate: + polygon = polygon.buffer(dilate) + if polygon.geom_type == 'GeometryCollection': + # heterogeneous result: filter zero-area shapes (LineString, Point) + polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0]) + if polygon.geom_type == 'MultiPolygon': + # homogeneous result: construct convex hull to connect + polygon = join_polygons(polygon.geoms) + return make_valid(polygon) -def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): - # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 - else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) +def polygon2contour(polygon: Polygon) -> np.ndarray: + polygon = np.array(polygon.exterior.coords[:-1], dtype=int) + return np.maximum(0, polygon).astype(int)[:, np.newaxis] - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables( - thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) +def make_intersection(poly1, poly2): + interp = poly1.intersection(poly2) + # post-process + if interp.is_empty or interp.area == 0.0: + return None + if interp.geom_type == 'GeometryCollection': + # heterogeneous result: filter zero-area shapes (LineString, Point) + interp = unary_union([geom for geom in interp.geoms if geom.area > 0]) + if interp.geom_type == 'MultiPolygon': + # homogeneous result: construct convex hull to connect + interp = join_polygons(interp.geoms) + assert interp.geom_type == 'Polygon', interp.wkt + interp = make_valid(interp) + return interp - img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3)) - img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1)) - - return img_ret[:, :, 0] +def make_valid(polygon: Polygon) -> Polygon: + """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" + def isint(x): + return isinstance(x, int) or int(x) == x + # make sure rounding does not invalidate + if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0: + polygon = Polygon(np.round(polygon.exterior.coords)) + points = list(polygon.exterior.coords[:-1]) + # try by re-arranging points + for split in range(1, len(points)): + if polygon.is_valid or polygon.simplify(polygon.area).is_valid: + break + # simplification may not be possible (at all) due to ordering + # in that case, try another starting point + polygon = Polygon(points[-split:]+points[:-split]) + # try by simplification + for tolerance in range(int(polygon.area + 1.5)): + if polygon.is_valid: + break + # simplification may require a larger tolerance + polygon = polygon.simplify(tolerance + 1) + # try by enlarging + for tolerance in range(1, int(polygon.area + 2.5)): + if polygon.is_valid: + break + # enlargement may require a larger tolerance + polygon = polygon.buffer(tolerance) + assert polygon.is_valid, polygon.wkt + return polygon +def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon: + """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points""" + # ensure input polygons are simply typed and all oriented equally + polygons = [orient(poly) + for poly in itertools.chain.from_iterable( + [poly.geoms + if poly.geom_type in ['MultiPolygon', 'GeometryCollection'] + else [poly] + for poly in polygons])] + npoly = len(polygons) + if npoly == 1: + return polygons[0] + # find min-dist path through all polygons (travelling salesman) + pairs = itertools.combinations(range(npoly), 2) + dists = np.zeros((npoly, npoly), dtype=float) + for i, j in pairs: + dist = polygons[i].distance(polygons[j]) + if dist < 1e-5: + dist = 1e-5 # if pair merely touches, we still need to get an edge + dists[i, j] = dist + dists[j, i] = dist + dists = minimum_spanning_tree(dists, overwrite=True) + # add bridge polygons (where necessary) + for prevp, nextp in zip(*dists.nonzero()): + prevp = polygons[prevp] + nextp = polygons[nextp] + nearest = nearest_points(prevp, nextp) + bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1) + polygons.append(bridgep) + jointp = unary_union(polygons) + if jointp.geom_type == 'MultiPolygon': + jointp = unary_union(jointp.geoms) + assert jointp.geom_type == 'Polygon', jointp.wkt + # follow-up calculations will necessarily be integer; + # so anticipate rounding here and then ensure validity + jointp2 = set_precision(jointp, 1.0) + if jointp2.geom_type != 'Polygon' or not jointp2.is_valid: + jointp2 = Polygon(np.round(jointp.exterior.coords)) + jointp2 = make_valid(jointp2) + assert jointp2.geom_type == 'Polygon', jointp2.wkt + return jointp2 diff --git a/src/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py index 67547d3..9f82fac 100644 --- a/src/eynollah/utils/drop_capitals.py +++ b/src/eynollah/utils/drop_capitals.py @@ -1,6 +1,7 @@ import numpy as np import cv2 from .contour import ( + find_center_of_contours, find_new_features_of_contours, return_contours_of_image, return_parent_contours, @@ -22,8 +23,8 @@ def adhere_drop_capital_region_into_corresponding_textline( ): # print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape') # print(all_found_textline_polygons[3]) - cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_h) + cx_m, cy_m = find_center_of_contours(contours_only_text_parent) + cx_h, cy_h = find_center_of_contours(contours_only_text_parent_h) cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contours(polygons_of_drop_capitals) img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) @@ -89,9 +90,9 @@ def adhere_drop_capital_region_into_corresponding_textline( region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 # print(region_final,'region_final') - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -153,9 +154,9 @@ def adhere_drop_capital_region_into_corresponding_textline( # areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))]) - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -208,7 +209,7 @@ def adhere_drop_capital_region_into_corresponding_textline( try: # print(all_found_textline_polygons[j_cont][0]) - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -261,7 +262,7 @@ def adhere_drop_capital_region_into_corresponding_textline( else: pass - ##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + ##cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) ###print(all_box_coord[j_cont]) ###print(cx_t) ###print(cy_t) @@ -315,9 +316,9 @@ def adhere_drop_capital_region_into_corresponding_textline( region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 # print(region_final,'region_final') - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -375,12 +376,12 @@ def adhere_drop_capital_region_into_corresponding_textline( # areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))]) - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(cx_t,'print') try: # print(all_found_textline_polygons[j_cont][0]) - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -453,7 +454,7 @@ def adhere_drop_capital_region_into_corresponding_textline( #####try: #####if len(contours_new_parent)==1: ######print(all_found_textline_polygons[j_cont][0]) - #####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont]) + #####cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[j_cont]) ######print(all_box_coord[j_cont]) ######print(cx_t) ######print(cy_t) diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 9ec0737..eaf0048 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -99,6 +99,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve except: point_left=first_nonzero + if point_left == first_nonzero and point_right == last_nonzero: + return text_regions if point_right>=mask_marginals.shape[1]: diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 32114aa..22ef00d 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -17,9 +17,12 @@ from .contour import ( return_contours_of_interested_textline, find_contours_mean_y_diff, ) +from .shm import share_ndarray, wrap_ndarray_shared from . import ( find_num_col_deskew, crop_image_inside_box, + box2rect, + box2slice, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -64,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -75,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : - arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -135,13 +142,12 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): rotation_matrix) def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): - (h, w) = img_patch.shape[:2] + h, w = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] y_d = M[1, 2] - thetha = thetha / 180. * np.pi - rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) + rotation_matrix = M[:2, :2] contour_text_interest_copy = contour_text_interest.copy() x_cont = contour_text_interest[:, 0, 0] @@ -176,7 +182,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -236,7 +243,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): try: neg_peaks_max=np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -313,23 +321,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down =y_max_cont-1 + ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down =y_max_cont-1 + ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( - 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + 1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) @@ -338,7 +359,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_narrow = img_patch.shape[0] - 2 - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -465,7 +488,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -540,7 +564,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -610,7 +635,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -686,30 +712,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: - point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = x_max_cont - 1 + ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = x_max_cont - 1 + ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) + #point_up + # np.max(y_cont) + #peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] @@ -798,7 +844,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -863,7 +910,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -947,7 +995,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -960,8 +1009,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): if len(arg_diff_cluster) > 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1011,7 +1063,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): try: neg_peaks_max = np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -1174,7 +1227,8 @@ def separate_lines_new_inside_tiles(img_path, thetha): if diff_peaks[i] > cut_off: if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) - forest = [peaks_neg[i + 1]] + forest = [] + forest.append(peaks_neg[i + 1]) if i == (len(peaks_neg) - 1): if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) @@ -1194,7 +1248,8 @@ def separate_lines_new_inside_tiles(img_path, thetha): if diff_peaks_pos[i] > cut_off: if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) - forest = [peaks[i + 1]] + forest = [] + forest.append(peaks[i + 1]) if i == (len(peaks) - 1): if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) @@ -1246,19 +1301,16 @@ def separate_lines_new_inside_tiles(img_path, thetha): def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines): kernel = np.ones((5, 5), np.uint8) - pixel = 255 + label = 255 min_area = 0 max_area = 1 - if len(img_patch.shape) == 3: - cnts_images = (img_patch[:, :, 0] == pixel) * 1 + if img_patch.ndim == 3: + cnts_images = (img_patch[:, :, 0] == label) * 1 else: - cnts_images = (img_patch[:, :] == pixel) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cnts_images = (img_patch[:, :] == label) * 1 + _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) + contours_imgs, hierarchy = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) contours_imgs = filter_contours_area_of_image_tables(thresh, @@ -1266,14 +1318,12 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i max_area=max_area, min_area=min_area) cont_final = [] for i in range(len(contours_imgs)): - img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3)) - img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255)) - img_contour = img_contour.astype(np.uint8) + img_contour = np.zeros(cnts_images.shape[:2], dtype=np.uint8) + img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=255) img_contour = cv2.dilate(img_contour, kernel, iterations=4) - imgrayrot = cv2.cvtColor(img_contour, cv2.COLOR_BGR2GRAY) - _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) - contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + _, threshrot = cv2.threshold(img_contour, 0, 255, 0) + contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) ##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[ ##0] @@ -1285,84 +1335,81 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i return None, cont_final -def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): - textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 - textline_mask = textline_mask.astype(np.uint8) +def textline_contours_postprocessing(textline_mask, slope, + contour_text_interest, box_ind, + add_boxes_coor_into_textlines=False): + textline_mask = textline_mask * 255 kernel = np.ones((5, 5), np.uint8) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) textline_mask = cv2.erode(textline_mask, kernel, iterations=2) # textline_mask = cv2.erode(textline_mask, kernel, iterations=1) - try: - x_help = 30 - y_help = 2 - textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), - textline_mask.shape[1] + int(2 * x_help), 3)) - textline_mask_help[y_help : y_help + textline_mask.shape[0], - x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) + x_help = 30 + y_help = 2 - dst = rotate_image(textline_mask_help, slope) - dst = dst[:, :, 0] - dst[dst != 0] = 1 + textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), + textline_mask.shape[1] + int(2 * x_help))) + textline_mask_help[y_help : y_help + textline_mask.shape[0], + x_help : x_help + textline_mask.shape[1]] = np.copy(textline_mask[:, :]) - # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: - # plt.imshow(dst) - # plt.show() + dst = rotate_image(textline_mask_help, slope) + dst[dst != 0] = 1 - contour_text_copy = contour_text_interest.copy() - contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] - contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] + # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: + # plt.imshow(dst) + # plt.show() - img_contour = np.zeros((box_ind[3], box_ind[2], 3)) - img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) + contour_text_copy = contour_text_interest.copy() + contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] + contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] - img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), - img_contour.shape[1] + int(2 * x_help), 3)) - img_contour_help[y_help : y_help + img_contour.shape[0], - x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) + img_contour = np.zeros((box_ind[3], box_ind[2])) + img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=255) - img_contour_rot = rotate_image(img_contour_help, slope) + img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), + img_contour.shape[1] + int(2 * x_help))) + img_contour_help[y_help : y_help + img_contour.shape[0], + x_help : x_help + img_contour.shape[1]] = np.copy(img_contour[:, :]) - img_contour_rot = img_contour_rot.astype(np.uint8) - # dst_help = dst_help.astype(np.uint8) - imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY) - _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) - contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + img_contour_rot = rotate_image(img_contour_help, slope) - len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] - ind_big_con = np.argmax(len_con_text_rot) + _, threshrot = cv2.threshold(img_contour_rot, 0, 255, 0) + contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if abs(slope) > 45: - _, contours_rotated_clean = separate_lines_vertical_cont( - textline_mask, contours_text_rot[ind_big_con], box_ind, slope, - add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) - else: - _, contours_rotated_clean = separate_lines( - dst, contours_text_rot[ind_big_con], slope, x_help, y_help) - except: - contours_rotated_clean = [] + len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] + ind_big_con = np.argmax(len_con_text_rot) + + if abs(slope) > 45: + _, contours_rotated_clean = separate_lines_vertical_cont( + textline_mask, contours_text_rot[ind_big_con], box_ind, slope, + add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) + else: + _, contours_rotated_clean = separate_lines( + dst, contours_text_rot[ind_big_con], slope, x_help, y_help) return contours_rotated_clean -def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): +def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None): if logger is None: logger = getLogger(__package__) + if not np.prod(img_crop.shape): + return img_crop if num_col == 1: - num_patches = int(img_path.shape[1] / 200.0) + num_patches = int(img_crop.shape[1] / 200.0) else: - num_patches = int(img_path.shape[1] / 140.0) - # num_patches=int(img_path.shape[1]/200.) + num_patches = int(img_crop.shape[1] / 140.0) + # num_patches=int(img_crop.shape[1]/200.) if num_patches == 0: num_patches = 1 - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] + img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] - # plt.imshow(img_patch_ineterst) + # plt.imshow(img_patch_interest) # plt.show() - length_x = int(img_path.shape[1] / float(num_patches)) + length_x = int(img_crop.shape[1] / float(num_patches)) # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 margin = int(0.04 * length_x) # if margin<=4: @@ -1370,7 +1417,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # margin=0 width_mid = length_x - 2 * margin - nxf = img_path.shape[1] / float(width_mid) + nxf = img_crop.shape[1] / float(width_mid) if nxf > int(nxf): nxf = int(nxf) + 1 @@ -1386,12 +1433,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x + if index_x_u > img_crop.shape[1]: + index_x_u = img_crop.shape[1] + index_x_d = img_crop.shape[1] - length_x # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + img_xline = img_patch_interest[:, index_x_d:index_x_u] try: assert img_xline.any() @@ -1407,9 +1454,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] + img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] - img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) + img_patch_interest_revised = np.zeros(img_patch_interest.shape) for i in range(nxf): if i == 0: @@ -1419,18 +1466,18 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x + if index_x_u > img_crop.shape[1]: + index_x_u = img_crop.shape[1] + index_x_d = img_crop.shape[1] - length_x - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + img_xline = img_patch_interest[:, index_x_d:index_x_u] img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] - img_resized = np.zeros((int(img_int.shape[0] * 1.2), int(img_int.shape[1] * 3))) - img_resized[int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0], - int(img_int.shape[1] * 1.0): int(img_int.shape[1] * 1.0) + img_int.shape[1]] = img_int[:, :] + img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) + img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], + int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] = img_int[:, :] # plt.imshow(img_xline) # plt.show() img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) @@ -1442,15 +1489,16 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1 img_patch_separated_returned_true_size = img_patch_separated_returned[ - int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0], - int(img_int.shape[1] * 1.0): int(img_int.shape[1] * 1.0) + img_int.shape[1]] + int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], + int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] - img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size + img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size - return img_patch_ineterst_revised + return img_patch_interest_revised -def do_image_rotation(angle, img, sigma_des, logger=None): +@wrap_ndarray_shared(kw='img') +def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None): if logger is None: logger = getLogger(__package__) img_rot = rotate_image(img, angle) @@ -1463,7 +1511,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None): return var def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, - main_page=False, logger=None, plotter=None, map=map): + main_page=False, logger=None, plotter=None, map=None): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1471,165 +1519,81 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] max_shape=np.max(img_int.shape) - img_resized=np.zeros((int(max_shape * 1.1) , int(max_shape * 1.1))) + img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) )) onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) - #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] + #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0], + # int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: angles = np.array([-45, 0, 45, 90,]) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) elif main_page: - angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + #angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) + angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4), + np.linspace(-6, 6, n_tot_angles // 2), + np.linspace(7, 12, n_tot_angles // 4))) + angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=11 if abs(angle) > early_slope_edge: if angle < 0: - angles = np.linspace(-90, -12, n_tot_angles) + angles2 = np.linspace(-90, -12, n_tot_angles) else: - angles = np.linspace(90, 12, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angles2 = np.linspace(90, 12, n_tot_angles) + angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter) + if var2 > var: + angle = angle2 else: angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=22 if abs(angle) > early_slope_edge: if angle < 0: - angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) + angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) else: - angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - + angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) + angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter) + if var2 > var: + angle = angle2 return angle def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): if logger is None: logger = getLogger(__package__) - results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles)) + if map is None: + results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger) + for angle in angles] + else: + with share_ndarray(img) as img_shared: + results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None), + angles)) if plotter: plotter.save_plot_of_rotation_angle(angles, results) try: var_res = np.array(results) assert var_res.any() - angle = angles[np.argmax(var_res)] + idx = np.argmax(var_res) + angle = angles[idx] + var = var_res[idx] except: logger.exception("cannot determine best angle among %s", str(angles)) angle = 0 - return angle - - -def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100, - main_page=False, logger=None, plotter=None): - if main_page and plotter: - plotter.save_plot_of_textline_density(img_patch_org) - - img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) - img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] - - max_shape=np.max(img_int.shape) - img_resized=np.zeros((int(max_shape * 1.1) , int(max_shape * 1.1))) - - onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) - onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) - - img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] - - if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: - angles = np.array([-45, 0, 45, 90,]) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - elif main_page: - angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - early_slope_edge=11 - if abs(angle) > early_slope_edge: - if angle < 0: - angles = np.linspace(-90, -12, n_tot_angles) - else: - angles = np.linspace(90, 12, n_tot_angles) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - else: - angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - early_slope_edge=22 - if abs(angle) > early_slope_edge: - if angle < 0: - angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) - else: - angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - return angle - -def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des): - vars_per_each_subprocess = [] - angles_per_each_subprocess = [] - for mv in range(len(angles_per_process)): - img_rot=rotate_image(img_resized,angles_per_process[mv]) - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - vars_per_each_subprocess.append(var_spectrum) - angles_per_each_subprocess.append(angles_per_process[mv]) - - queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess]) - -def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None): - num_cores = cpu_count() - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angles), num_cores + 1) - - for i in range(num_cores): - angles_per_process = angles[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - all_angles = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - vars_for_subprocess = list_all_par[0] - angles_sub_process = list_all_par[1] - for j in range(len(vars_for_subprocess)): - var_res.append(vars_for_subprocess[j]) - all_angles.append(angles_sub_process[j]) - - for i in range(num_cores): - processes[i].join() - - if plotter: - plotter.save_plot_of_rotation_angle(all_angles, var_res) - - - try: - var_res=np.array(var_res) - ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - return ang_int + var = 0 + return angle, var +@wrap_ndarray_shared(kw='textline_mask_tot_ea') def do_work_of_slopes_new( - box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, + box_text, contour, contour_par, + textline_mask_tot_ea=None, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1639,7 +1603,7 @@ def do_work_of_slopes_new( logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1647,7 +1611,7 @@ def do_work_of_slopes_new( img_int_p = all_text_region_raw[:,:] img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) - if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] @@ -1683,11 +1647,14 @@ def do_work_of_slopes_new( all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) - return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + return cnt_clean_rot, crop_coor, slope +@wrap_ndarray_shared(kw='textline_mask_tot_ea') +@wrap_ndarray_shared(kw='mask_texts_only') def do_work_of_slopes_new_curved( - box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, + box_text, contour_par, + textline_mask_tot_ea=None, mask_texts_only=None, + num_col=1, scale_par=1.0, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1704,7 +1671,7 @@ def do_work_of_slopes_new_curved( # plt.imshow(img_int_p) # plt.show() - if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: + if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew else: @@ -1730,7 +1697,7 @@ def do_work_of_slopes_new_curved( slope_for_all = slope_deskew slope = slope_for_all - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) if abs(slope_for_all) < 45: textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) @@ -1763,20 +1730,25 @@ def do_work_of_slopes_new_curved( mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + mask_biggest2 = resize_image(mask_biggest2, + int(mask_biggest2.shape[0] * scale_par), + int(mask_biggest2.shape[1] * scale_par)) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) try: textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) except Exception as why: logger.error(why) else: - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, + slope_for_all, contour_par, + box_text, True) - return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope + return textlines_cnt_per_region[::-1], crop_coor, slope +@wrap_ndarray_shared(kw='textline_mask_tot_ea') def do_work_of_slopes_new_light( - box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, + box_text, contour, contour_par, + textline_mask_tot_ea=None, slope_deskew=0, textline_light=True, logger=None ): if logger is None: @@ -1784,7 +1756,7 @@ def do_work_of_slopes_new_light( logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1805,4 +1777,4 @@ def do_work_of_slopes_new_light( all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) - return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew + return cnt_clean_rot, crop_coor, slope_deskew diff --git a/src/eynollah/utils/shm.py b/src/eynollah/utils/shm.py new file mode 100644 index 0000000..4b51053 --- /dev/null +++ b/src/eynollah/utils/shm.py @@ -0,0 +1,45 @@ +from multiprocessing import shared_memory +from contextlib import contextmanager +from functools import wraps +import numpy as np + +@contextmanager +def share_ndarray(array: np.ndarray): + size = np.dtype(array.dtype).itemsize * np.prod(array.shape) + shm = shared_memory.SharedMemory(create=True, size=size) + try: + shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf) + shared_array[:] = array[:] + shared_array.flags["WRITEABLE"] = False + yield dict(shape=array.shape, dtype=array.dtype, name=shm.name) + finally: + shm.close() + shm.unlink() + +@contextmanager +def ndarray_shared(array: dict): + shm = shared_memory.SharedMemory(name=array['name']) + try: + array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf) + yield array + finally: + shm.close() + +def wrap_ndarray_shared(kw=None): + def wrapper(f): + if kw is None: + @wraps(f) + def shared_func(array, *args, **kwargs): + with ndarray_shared(array) as ndarray: + return f(ndarray, *args, **kwargs) + return shared_func + else: + @wraps(f) + def shared_func(*args, **kwargs): + array = kwargs.pop(kw) + with ndarray_shared(array) as ndarray: + kwargs[kw] = ndarray + return f(*args, **kwargs) + return shared_func + return wrapper + diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 4fa99f7..6e71b0f 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -1,13 +1,17 @@ +import math +import copy + import numpy as np import cv2 import tensorflow as tf from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d -import math from PIL import Image, ImageDraw, ImageFont from Bio import pairwise2 + from .resize import resize_image + def decode_batch_predictions(pred, num_to_char, max_len = 128): # input_len is the product of the batch size and the # number of time steps. @@ -92,6 +96,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t return peaks_final else: return None + # Function to fit text inside the given area def fit_text_single_line(draw, text, font_path, max_width, max_height): initial_font_size = 50 @@ -369,7 +374,13 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind return textline_contour -def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False): +def return_rnn_cnn_ocr_of_given_textlines(image, + all_found_textline_polygons, + all_box_coord, + prediction_model, + b_s_ocr, num_to_char, + textline_light=False, + curved_line=False): max_len = 512 padding_token = 299 image_width = 512#max_len * 4 @@ -425,17 +436,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) else: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -468,7 +485,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr pred_texts_ib = pred_texts[ib].replace("[UNK]", "") extracted_texts.append(pred_texts_ib) - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) diff --git a/src/eynollah/utils/xml.py b/src/eynollah/utils/xml.py index 13420df..88d1df8 100644 --- a/src/eynollah/utils/xml.py +++ b/src/eynollah/utils/xml.py @@ -57,19 +57,15 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_margina og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) region_counter.inc('region') - for idx_textregion, _ in enumerate(order_of_texts): - og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1))) + for idx_textregion in order_of_texts: + og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(idx_textregion + 1))) region_counter.inc('region') for id_marginal in id_of_marginalia_right: og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) region_counter.inc('region') -def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): - indexes_sorted = np.array(indexes_sorted) - index_of_types = np.array(index_of_types) - kind_of_texts = np.array(kind_of_texts) - +def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, indexes_sorted, index_of_types, kind_of_texts, ref_point): id_of_texts = [] order_of_texts = [] diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 4d97153..9c3456a 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -56,113 +56,30 @@ class EynollahXmlWriter: points_page_print = points_page_print + ' ' return points_page_print[:-1] - def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion): - for j in range(len(all_found_textline_polygons_marginals[marginal_idx])): - coords = CoordsType() - textline = TextLineType(id=counter.next_line_id, Coords=coords) - if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) - marginal_region.add_TextLine(textline) - marginal_region.set_orientation(-slopes_marginals[marginal_idx]) - points_co = '' - for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): - if not (self.curved_line or self.textline_light): - if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) - else: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) - points_co += str(textline_x_coord) - points_co += ',' - points_co += str(textline_y_coord) - if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45: - if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) - - elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45: - if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) - points_co += ' ' - coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') - for j in range(len(all_found_textline_polygons[region_idx])): + for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + # FIXME: add OCR confidence + textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])]) text_region.add_TextLine(textline) text_region.set_orientation(-slopes[region_idx]) region_bboxes = all_box_coord[region_idx] points_co = '' - for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): - if not (self.curved_line or self.textline_light): - if len(contour_textline) == 2: - textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) - else: - textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) - points_co += str(textline_x_coord) - points_co += ',' - points_co += str(textline_y_coord) - - if self.textline_light or (self.curved_line and np.abs(slopes[region_idx]) <= 45): - if len(contour_textline) == 2: - points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) - elif self.curved_line and np.abs(slopes[region_idx]) > 45: - if len(contour_textline)==2: - points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0])/self.scale_y)) - else: - points_co += str(int((contour_textline[0][0] + region_bboxes[2]+page_coord[2])/self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) - points_co += ' ' - coords.set_points(points_co[:-1]) - - def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): - self.logger.debug('enter serialize_lines_in_region') - for j in range(1): - coords = CoordsType() - textline = TextLineType(id=counter.next_line_id, Coords=coords) - if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) - text_region.add_TextLine(textline) - #region_bboxes = all_box_coord[region_idx] - points_co = '' - for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]): - if len(contour_textline) == 2: - points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) - - points_co += ' ' + for point in polygon_textline: + if len(point) != 2: + point = point[0] + point_x = point[0] + page_coord[2] + point_y = point[1] + page_coord[0] + # FIXME: or actually... not self.textline_light and not self.curved_line or np.abs(slopes[region_idx]) > 45? + if not self.textline_light and not (self.curved_line and np.abs(slopes[region_idx]) <= 45): + point_x += region_bboxes[2] + point_y += region_bboxes[0] + point_x = max(0, int(point_x / self.scale_x)) + point_y = max(0, int(point_y / self.scale_y)) + points_co += str(point_x) + ',' + str(point_y) + ' ' coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): @@ -170,8 +87,50 @@ class EynollahXmlWriter: with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False): - self.logger.debug('enter build_pagexml_no_full_layout') + def build_pagexml_no_full_layout( + self, found_polygons_text_region, + page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, + all_box_coord, + found_polygons_text_region_img, + found_polygons_marginals_left, found_polygons_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, + found_polygons_tables, + **kwargs): + return self.build_pagexml_full_layout( + found_polygons_text_region, [], + page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, [], + all_box_coord, [], + found_polygons_text_region_img, found_polygons_tables, [], + found_polygons_marginals_left, found_polygons_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, [], slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, + **kwargs) + + def build_pagexml_full_layout( + self, + found_polygons_text_region, found_polygons_text_region_h, + page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, all_found_textline_polygons_h, + all_box_coord, all_box_coord_h, + found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, + found_polygons_marginals_left,found_polygons_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, + ocr_all_textlines=None, ocr_all_textlines_h=None, + ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, + ocr_all_textlines_drop=None, + conf_contours_textregions=None, conf_contours_textregions_h=None, + skip_layout_reading_order=False): + self.logger.debug('enter build_pagexml') # create the file structure pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) @@ -179,191 +138,116 @@ class EynollahXmlWriter: page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) counter = EynollahIdCounter() - if len(found_polygons_text_region) > 0: + if len(order_of_texts): _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] - id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] + id_of_marginalia_left = [_counter_marginals.next_region_id + for _ in found_polygons_marginals_left] + id_of_marginalia_right = [_counter_marginals.next_region_id + for _ in found_polygons_marginals_right] xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) - for mm in range(len(found_polygons_text_region)): - textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]), - ) - #textregion.set_conf(conf_contours_textregion[mm]) + for mm, region_contour in enumerate(found_polygons_text_region): + textregion = TextRegionType( + id=counter.next_region_id, type_='paragraph', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord, + skip_layout_reading_order)) + ) + if conf_contours_textregions: + textregion.Coords.set_conf(conf_contours_textregions[mm]) page.add_TextRegion(textregion) if ocr_all_textlines: ocr_textlines = ocr_all_textlines[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals_left)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) - page.add_TextRegion(marginal) - if ocr_all_textlines_marginals_left: - ocr_textlines = ocr_all_textlines_marginals_left[mm] - else: - ocr_textlines = None - - #print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) ) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals_right)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) - page.add_TextRegion(marginal) - if ocr_all_textlines_marginals_right: - ocr_textlines = ocr_all_textlines_marginals_right[mm] - else: - ocr_textlines = None - - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) - - for mm in range(len(found_polygons_text_region_img)): - img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType()) - page.add_ImageRegion(img_region) - points_co = '' - for lmm in range(len(found_polygons_text_region_img[mm])): - try: - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) - points_co += ' ' - except: - - points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x )) - points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y )) - points_co += ' ' - - img_region.get_Coords().set_points(points_co[:-1]) - - for mm in range(len(polygons_lines_to_be_written_in_xml)): - sep_hor = SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType()) - page.add_SeparatorRegion(sep_hor) - points_co = '' - for lmm in range(len(polygons_lines_to_be_written_in_xml[mm])): - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,0] ) / self.scale_x)) - points_co += ',' - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,1] ) / self.scale_y)) - points_co += ' ' - sep_hor.get_Coords().set_points(points_co[:-1]) - for mm in range(len(found_polygons_tables)): - tab_region = TableRegionType(id=counter.next_region_id, Coords=CoordsType()) - page.add_TableRegion(tab_region) - points_co = '' - for lmm in range(len(found_polygons_tables[mm])): - points_co += str(int((found_polygons_tables[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((found_polygons_tables[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) - points_co += ' ' - tab_region.get_Coords().set_points(points_co[:-1]) - - return pcgts - - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None): - self.logger.debug('enter build_pagexml_full_layout') - - # create the file structure - pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) - page = pcgts.get_Page() - page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) - - counter = EynollahIdCounter() - _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] - id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] - xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) - - for mm in range(len(found_polygons_text_region)): - textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm])) - page.add_TextRegion(textregion) - - if ocr_all_textlines: - ocr_textlines = ocr_all_textlines[mm] - else: - ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, + all_box_coord, slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) - for mm in range(len(found_polygons_text_region_h)): - textregion = TextRegionType(id=counter.next_region_id, type_='header', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) + for mm, region_contour in enumerate(found_polygons_text_region_h): + textregion = TextRegionType( + id=counter.next_region_id, type_='heading', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) + if conf_contours_textregions_h: + textregion.Coords.set_conf(conf_contours_textregions_h[mm]) page.add_TextRegion(textregion) - if ocr_all_textlines_h: ocr_textlines = ocr_all_textlines_h[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, + all_box_coord_h, slopes_h, counter, ocr_textlines) - for mm in range(len(found_polygons_marginals_left)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) + for mm, region_contour in enumerate(found_polygons_marginals_left): + marginal = TextRegionType( + id=counter.next_region_id, type_='marginalia', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) page.add_TextRegion(marginal) if ocr_all_textlines_marginals_left: ocr_textlines = ocr_all_textlines_marginals_left[mm] else: ocr_textlines = None - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals_right)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) + self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) + + for mm, region_contour in enumerate(found_polygons_marginals_right): + marginal = TextRegionType( + id=counter.next_region_id, type_='marginalia', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) page.add_TextRegion(marginal) if ocr_all_textlines_marginals_right: ocr_textlines = ocr_all_textlines_marginals_right[mm] else: ocr_textlines = None - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) - - for mm in range(len(found_polygons_drop_capitals)): - dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) + self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, + all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) + + for mm, region_contour in enumerate(found_polygons_drop_capitals): + dropcapital = TextRegionType( + id=counter.next_region_id, type_='drop-capital', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) page.add_TextRegion(dropcapital) - all_box_coord_drop = None - slopes_drop = None + all_box_coord_drop = [[0, 0, 0, 0]] + slopes_drop = [0] if ocr_all_textlines_drop: ocr_textlines = ocr_all_textlines_drop[mm] else: ocr_textlines = None - self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines) + self.serialize_lines_in_region(dropcapital, [[found_polygons_drop_capitals[mm]]], 0, page_coord, + all_box_coord_drop, slopes_drop, counter, ocr_textlines) - for mm in range(len(found_polygons_text_region_img)): - page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) + for region_contour in found_polygons_text_region_img: + page.add_ImageRegion( + ImageRegionType(id=counter.next_region_id, + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)))) - for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) + for region_contour in polygons_seplines: + page.add_SeparatorRegion( + SeparatorRegionType(id=counter.next_region_id, + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, [0, 0, 0, 0])))) - for mm in range(len(found_polygons_tables)): - page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) + for region_contour in found_polygons_tables: + page.add_TableRegion( + TableRegionType(id=counter.next_region_id, + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)))) return pcgts def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False): self.logger.debug('enter calculate_polygon_coords') coords = '' - for value_bbox in contour: - if skip_layout_reading_order: - if len(value_bbox) == 2: - coords += str(int((value_bbox[0]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[1]) / self.scale_y)) - else: - coords += str(int((value_bbox[0][0]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[0][1]) / self.scale_y)) - else: - if len(value_bbox) == 2: - coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y)) - else: - coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y)) - coords=coords + ' ' + for point in contour: + if len(point) != 2: + point = point[0] + point_x = point[0] + point_y = point[1] + if not skip_layout_reading_order: + point_x += page_coord[2] + point_y += page_coord[0] + point_x = int(point_x / self.scale_x) + point_y = int(point_y / self.scale_y) + coords += str(point_x) + ',' + str(point_y) + ' ' return coords[:-1] diff --git a/tests/test_run.py b/tests/test_run.py index be928a0..79c64c2 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -17,18 +17,35 @@ from ocrd_models.constants import NAMESPACES as NS testdir = Path(__file__).parent.resolve() MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve())) -MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) +MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) -def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + #["--allow_scaling", "--curved-line"], + ["--allow_scaling", "--curved-line", "--full-layout"], + ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"], + ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based", + "--textline_light", "--light_version"], + # -ep ... + # -eoi ... + # FIXME: find out whether OCR extra was installed, otherwise skip these + ["--do_ocr"], + ["--do_ocr", "--light_version", "--textline_light"], + ["--do_ocr", "--transformer_ocr"], + #["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light"], + ["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light", "--full-layout"], + # --skip_layout_and_reading_order + ], ids=str) +def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' args = [ '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), - # subtests write to same location - '--overwrite', ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) @@ -36,33 +53,57 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() - for options in [ - [], # defaults - ["--allow_scaling", "--curved-line"], - ["--allow_scaling", "--curved-line", "--full-layout"], - ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"], - ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based", - "--textline_light", "--light_version"], - # -ep ... - # -eoi ... - # --do_ocr - # --skip_layout_and_reading_order - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(layout_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - assert str(infile) in logmsgs - assert outfile.exists() - tree = page_from_file(str(outfile)).etree - regions = tree.xpath("//page:TextRegion", namespaces=NS) - assert len(regions) >= 2, "result is inaccurate" - regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) - assert len(regions) >= 2, "result is inaccurate" - lines = tree.xpath("//page:TextLine", namespaces=NS) - assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line + with caplog.filtering(only_eynollah): + result = runner.invoke(layout_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert str(infile) in logmsgs + assert outfile.exists() + tree = page_from_file(str(outfile)).etree + regions = tree.xpath("//page:TextRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + lines = tree.xpath("//page:TextLine", namespaces=NS) + assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line + +@pytest.mark.parametrize( + "options", + [ + ["--tables"], + ["--tables", "--full-layout"], + ["--tables", "--full-layout", "--textline_light", "--light_version"], + ], ids=str) +def test_run_eynollah_layout_filename2(tmp_path, pytestconfig, caplog, options): + infile = testdir.joinpath('resources/euler_rechenkunst01_1738_0025.tif') + outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml' + args = [ + '-m', MODELS_LAYOUT, + '-i', str(infile), + '-o', str(outfile.parent), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'eynollah' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(layout_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert str(infile) in logmsgs + assert outfile.exists() + tree = page_from_file(str(outfile)).etree + regions = tree.xpath("//page:TextRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + regions = tree.xpath("//page:TableRegion", namespaces=NS) + # model/decoding is not very precise, so (depending on mode) we can get fractures/splits/FP + assert len(regions) >= 1, "result is inaccurate" + regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + lines = tree.xpath("//page:TextLine", namespaces=NS) + assert len(lines) >= 2, "result is inaccurate" # mostly table (if detected correctly), but 1 page and 1 catch-word line def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') @@ -86,7 +127,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in')) assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + ["--no-patches"], + ], ids=str) +def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ @@ -100,25 +147,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca def only_eynollah(logrec): return logrec.name == 'SbbBinarizer' runner = CliRunner() - for options in [ - [], # defaults - ["--no-patches"], - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) - assert outfile.exists() - with Image.open(infile) as original_img: - original_size = original_img.size - with Image.open(outfile) as binarized_img: - binarized_size = binarized_img.size - assert original_size == binarized_size + with caplog.filtering(only_eynollah): + result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) + assert outfile.exists() + with Image.open(infile) as original_img: + original_size = original_img.size + with Image.open(outfile) as binarized_img: + binarized_size = binarized_img.size + assert original_size == binarized_size -def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ @@ -139,15 +180,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + ["-sos"], + ], ids=str) +def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), - # subtests write to same location - '--overwrite', ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) @@ -155,25 +200,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap def only_eynollah(logrec): return logrec.name == 'enhancement' runner = CliRunner() - for options in [ - [], # defaults - ["-sos"], - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs - assert outfile.exists() - with Image.open(infile) as original_img: - original_size = original_img.size - with Image.open(outfile) as enhanced_img: - enhanced_size = enhanced_img.size - assert (original_size == enhanced_size) == ("-sos" in options) + with caplog.filtering(only_eynollah): + result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs + assert outfile.exists() + with Image.open(infile) as original_img: + original_size = original_img.size + with Image.open(outfile) as enhanced_img: + enhanced_size = enhanced_img.size + assert (original_size == enhanced_size) == ("-sos" in options) -def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ @@ -194,7 +233,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') args = [ @@ -223,7 +262,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo #assert in_order != out_order assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3'] -def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ @@ -245,7 +284,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2 assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + ["-doit", #str(outrenderfile.parent)], + ], + ["-trocr"], + ], ids=str) +def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png') @@ -255,8 +302,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): '-i', str(infile), '-dx', str(infile.parent), '-o', str(outfile.parent), - # subtests write to same location - '--overwrite', ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) @@ -264,33 +309,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() - for options in [ - # kba Fri Sep 26 12:53:49 CEST 2025 - # Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged - # [], # defaults - # ["-doit", str(outrenderfile.parent)], - ["-trocr"], - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - # FIXME: ocr has no logging! - #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs - assert outfile.exists() - if "-doit" in options: - assert outrenderfile.exists() - #in_tree = page_from_file(str(infile)).etree - #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) - out_tree = page_from_file(str(outfile)).etree - out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS) - assert len(out_texts) >= 2, ("result is inaccurate", out_texts) - assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts) + if "-doit" in options: + options.insert(options.index("-doit") + 1, str(outrenderfile.parent)) + with caplog.filtering(only_eynollah): + result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + # FIXME: ocr has no logging! + #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs + assert outfile.exists() + if "-doit" in options: + assert outrenderfile.exists() + #in_tree = page_from_file(str(infile)).etree + #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) + out_tree = page_from_file(str(outfile)).etree + out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS) + assert len(out_texts) >= 2, ("result is inaccurate", out_texts) + assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts) -@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged") -def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 252213f..e2b323a 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -2,6 +2,5 @@ def test_utils_import(): import eynollah.utils import eynollah.utils.contour import eynollah.utils.drop_capitals - import eynollah.utils.drop_capitals import eynollah.utils.is_nan import eynollah.utils.rotate