diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 042e508..9d5b2c8 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -24,24 +24,39 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" df -h - uses: actions/checkout@v4 - - uses: actions/cache@v4 + - uses: actions/cache/restore@v4 id: seg_model_cache with: path: models_layout_v0_5_0 - key: ${{ runner.os }}-models - - uses: actions/cache@v4 + key: seg-models + - uses: actions/cache/restore@v4 id: ocr_model_cache with: - path: models_ocr_v0_5_0 - key: ${{ runner.os }}-models - - uses: actions/cache@v4 + path: models_ocr_v0_5_1 + key: ocr-models + - uses: actions/cache/restore@v4 id: bin_model_cache with: path: default-2021-03-09 - key: ${{ runner.os }}-modelbin + key: bin-models - name: Download models if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true run: make models + - uses: actions/cache/save@v4 + if: steps.seg_model_cache.outputs.cache-hit != 'true' + with: + path: models_layout_v0_5_0 + key: seg-models + - uses: actions/cache/save@v4 + if: steps.ocr_model_cache.outputs.cache-hit != 'true' + with: + path: models_ocr_v0_5_1 + key: ocr-models + - uses: actions/cache/save@v4 + if: steps.bin_model_cache.outputs.cache-hit != 'true' + with: + path: default-2021-03-09 + key: bin-models - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: diff --git a/.gitignore b/.gitignore index 0d5d834..3cc0eac 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,11 @@ __pycache__ sbb_newspapers_org_image/pylint.log models_eynollah* +models_ocr* +models_layout* +default-2021-03-09 output.html /build /dist *.tif +TAGS diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ad9a09..f6776d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + * :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.) + * `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify + * `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring + * `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice + * `get_marginals`: exit early if no peaks found to avoid spurious overlap mask + * `get_smallest_skew`: after shifting search range of rotation angle, use overall best result + * Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR) + * OCR: re-instate missing methods and fix `utils_ocr` function calls + * :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`) +f458e3e + * tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate` + (so CUDA memory gets freed between tests if running on GPU) + +Changed: + + * polygons: slightly widen for regions and lines, increase for separators + * various refactorings, some code style and identifier improvements + * deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster), + but use shared memory if necessary, and switch back from `loky` to stdlib, + and shutdown in `del()` instead of `atexit` + * :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too + * :fire: writer: use `@type='heading'` instead of `'header'` for headings + * CI: update+improve model caching + + ## [0.5.0] - 2025-09-26 Fixed: diff --git a/Dockerfile b/Dockerfile index 4ba498b..a15776e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json # install everything and reduce image size RUN make install EXTRAS=OCR && rm -rf /build/eynollah +# fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow) +RUN pip install nvidia-cudnn-cu11==8.6.0.163 # smoke test RUN eynollah --help diff --git a/Makefile b/Makefile index a920615..357aa47 100644 --- a/Makefile +++ b/Makefile @@ -13,12 +13,18 @@ DOCKER ?= docker #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 +SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL))) +SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%) BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip +BIN_MODELFILE = $(notdir $(BIN_MODEL)) +BIN_MODELNAME := default-2021-03-09 -OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1 +OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1 +OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL))) +OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%) -PYTEST_ARGS ?= -vv +PYTEST_ARGS ?= -vv --isolate # BEGIN-EVAL makefile-parser --make-help Makefile @@ -31,7 +37,8 @@ help: @echo " install Install package with pip" @echo " install-dev Install editable with pip" @echo " deps-test Install test dependencies with pip" - @echo " models Download and extract models to $(CURDIR)/models_layout_v0_5_0" + @echo " models Download and extract models to $(CURDIR):" + @echo " $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)" @echo " smoke-test Run simple CLI check" @echo " ocrd-test Run OCR-D CLI check" @echo " test Run unit tests" @@ -42,33 +49,29 @@ help: @echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]" @echo " SEG_MODEL URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]" @echo " BIN_MODEL URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]" + @echo " OCR_MODEL URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]" @echo "" # END-EVAL # Download and extract models to $(PWD)/models_layout_v0_5_0 -models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09 +models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) -models_layout_v0_5_0: models_layout_v0_5_0.tar.gz - tar zxf models_layout_v0_5_0.tar.gz - -models_layout_v0_5_0.tar.gz: +$(BIN_MODELFILE): + wget -O $@ $(BIN_MODEL) +$(SEG_MODELFILE): wget -O $@ $(SEG_MODEL) - -models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz - tar zxf models_ocr_v0_5_0.tar.gz - -models_ocr_v0_5_0.tar.gz: +$(OCR_MODELFILE): wget -O $@ $(OCR_MODEL) -default-2021-03-09: $(notdir $(BIN_MODEL)) - unzip $(notdir $(BIN_MODEL)) +$(BIN_MODELNAME): $(BIN_MODELFILE) mkdir $@ - mv $(basename $(notdir $(BIN_MODEL))) $@ - -$(notdir $(BIN_MODEL)): - wget $(BIN_MODEL) + unzip -d $@ $< +$(SEG_MODELNAME): $(SEG_MODELFILE) + tar zxf $< +$(OCR_MODELNAME): $(OCR_MODELFILE) + tar zxf $< build: $(PIP) install build @@ -82,7 +85,10 @@ install: install-dev: $(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)]) -deps-test: models_layout_v0_5_0 +ifeq (OCR,$(findstring OCR, $(EXTRAS))) +deps-test: $(OCR_MODELNAME) +endif +deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME) $(PIP) install -r requirements-test.txt smoke-test: TMPDIR != mktemp -d @@ -123,9 +129,9 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif $(RM) -r $(TMPDIR) # Run unit tests -test: export MODELS_LAYOUT=$(CURDIR)/models_layout_v0_5_0 -test: export MODELS_OCR=$(CURDIR)/models_ocr_v0_5_0 -test: export MODELS_BIN=$(CURDIR)/default-2021-03-09 +test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME) +test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME) +test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME) test: $(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS) diff --git a/requirements-test.txt b/requirements-test.txt index cce9428..3ebcf71 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,4 @@ pytest -pytest-subtests +pytest-isolate coverage[toml] black diff --git a/requirements.txt b/requirements.txt index 4bc0c6a..db1d7df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,4 @@ scikit-learn >= 0.23.2 tensorflow < 2.13 numba <= 0.58.1 scikit-image -loky biopython diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 5e8412e..4b2e34f 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -28,7 +28,8 @@ from multiprocessing import cpu_count import gc import copy import json -from loky import ProcessPoolExecutor + +from concurrent.futures import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 import numpy as np @@ -79,6 +80,8 @@ from .utils.contour import ( return_contours_of_interested_region_by_min_size, return_contours_of_interested_textline, return_parent_contours, + dilate_textregion_contours, + dilate_textline_contours, ) from .utils.rotate import ( rotate_image, @@ -102,7 +105,6 @@ from .utils.separate_lines import ( textline_contours_postprocessing, separate_lines_new2, return_deskew_slop, - return_deskew_slop_old_mp, do_work_of_slopes_new, do_work_of_slopes_new_curved, do_work_of_slopes_new_light, @@ -113,10 +115,13 @@ from .utils.drop_capitals import ( ) from .utils.marginals import get_marginals from .utils.resize import resize_image +from .utils.shm import share_ndarray from .utils import ( is_image_filename, boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, + box2rect, + box2slice, find_num_col, otsu_copy_binary, put_drop_out_from_only_drop_model, @@ -252,6 +257,9 @@ class Eynollah: self.num_col_lower = int(num_col_lower) else: self.num_col_lower = num_col_lower + + # for parallelization of CPU-intensive tasks: + self.executor = ProcessPoolExecutor(max_workers=cpu_count()) if threshold_art_class_layout: self.threshold_art_class_layout = float(threshold_art_class_layout) @@ -263,7 +271,6 @@ class Eynollah: else: self.threshold_art_class_textline = 0.1 - self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" @@ -280,8 +287,17 @@ class Eynollah: self.model_page_dir = dir_models + "/model_eynollah_page_extraction_20250915" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_mb_ro_aug_ens_11"#"/model_step_3200000_mb_ro"#"/model_ens_reading_order_machine_based"#"/model_mb_ro_aug_ens_8"#"/model_ens_reading_order_machine_based" + self.model_region_dir_p_ens_light_only_images_extraction = (dir_models + + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" + ) + self.model_reading_order_dir = (dir_models + + "/model_eynollah_reading_order_20250824" + #"/model_mb_ro_aug_ens_11" + #"/model_step_3200000_mb_ro" + #"/model_ens_reading_order_machine_based" + #"/model_mb_ro_aug_ens_8" + #"/model_ens_reading_order_machine_based" + ) #"/modelens_12sp_elay_0_3_4__3_6_n" #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" @@ -311,20 +327,15 @@ class Eynollah: if self.ocr and self.tr: self.model_ocr_dir = dir_models + "/model_eynollah_ocr_trocr_20250919" elif self.ocr and not self.tr: - self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250904" + self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930" if self.tables: if self.light_version: self.model_table_dir = dir_models + "/modelens_table_0t4_201124" else: self.model_table_dir = dir_models + "/eynollah-tables_20210319" - - + t_start = time.time() - # for parallelization of CPU-intensive tasks: - self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) - atexit.register(self.executor.shutdown) - # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) @@ -375,11 +386,9 @@ class Eynollah: self.b_s_ocr = 8 else: self.b_s_ocr = int(batch_size_ocr) - with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: characters = json.load(config_file) - AUTOTUNE = tf.data.AUTOTUNE @@ -396,6 +405,26 @@ class Eynollah: self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)") + def __del__(self): + if hasattr(self, 'executor') and getattr(self, 'executor'): + self.executor.shutdown() + for model_name in ['model_page', + 'model_classifier', + 'model_bin', + 'model_enhancement', + 'model_region', + 'model_region_1_2', + 'model_region_p2', + 'model_region_fl_np', + 'model_region_fl', + 'model_textline', + 'model_reading_order', + 'model_table', + 'model_ocr', + 'processor']: + if hasattr(self, model_name) and getattr(self, model_name): + delattr(self, model_name) + def cache_images(self, image_filename=None, image_pil=None, dpi=None): ret = {} t_c0 = time.time() @@ -836,7 +865,9 @@ class Eynollah: self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, - thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1): + thresholding_for_artificial_class_in_light_version=False, + thresholding_for_fl_light_version=False, + threshold_art_class_textline=0.1): self.logger.debug("enter do_prediction") img_height_model = model.layers[-1].output_shape[1] @@ -1250,7 +1281,9 @@ class Eynollah: self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, - thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1, threshold_art_class_layout=0.1): + thresholding_for_artificial_class_in_light_version=False, + threshold_art_class_textline=0.1, + threshold_art_class_layout=0.1): self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] @@ -1380,7 +1413,8 @@ class Eynollah: for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): seg_in_art = seg_art[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] @@ -1400,7 +1434,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[0:-margin or None, @@ -1417,7 +1452,8 @@ class Eynollah: label_p_pred[0, margin:, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[margin:, @@ -1435,7 +1471,8 @@ class Eynollah: 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[margin:, @@ -1452,7 +1489,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[0:-margin or None, @@ -1469,7 +1507,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[margin:-margin or None, @@ -1485,7 +1524,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[margin:-margin or None, @@ -1501,7 +1541,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[0:-margin or None, @@ -1517,7 +1558,8 @@ class Eynollah: label_p_pred[0, margin:, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[margin:, @@ -1533,7 +1575,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[margin:-margin or None, @@ -1682,7 +1725,10 @@ class Eynollah: else: img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3, thresholding_for_fl_light_version=thresholding_for_fl_light_version) + prediction_regions = self.do_prediction(patches, img, model_region, + marginal_of_patch_percent=0.1, + n_batch_inference=3, + thresholding_for_fl_light_version=thresholding_for_fl_light_version) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -1810,8 +1856,7 @@ class Eynollah: return sorted_textlines - - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) M_main_tot = [cv2.moments(polygons_of_textlines[j]) @@ -1836,55 +1881,69 @@ class Eynollah: cy_textline_in = [cy_main_tot[ind] for ind in indexes_in] w_h_textlines_in = [w_h_textlines[ind][0] / float(w_h_textlines[ind][1]) for ind in indexes_in] - textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, cx_textline_in, cy_textline_in, w_h_textlines_in) + textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, + cx_textline_in, + cy_textline_in, + w_h_textlines_in) all_found_textline_polygons.append(textlines_ins)#[::-1]) slopes.append(slope_deskew) - _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + crop_coor = box2rect(boxes[index]) all_box_coord.append(crop_coor) - return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes + return (all_found_textline_polygons, + boxes, + contours, + contours_par, + all_box_coord, + np.array(range(len(contours_par))), + slopes) - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") - results = self.executor.map(partial(do_work_of_slopes_new_light, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew,textline_light=self.textline_light, - logger=self.logger,), - boxes, contours, contours_par, range(len(contours_par))) + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + results = self.executor.map(partial(do_work_of_slopes_new_light, + textline_mask_tot_ea=textline_mask_tot_shared, + slope_deskew=slope_deskew, + textline_light=self.textline_light, + logger=self.logger,), + boxes, contours, contours_par, range(len(contours_par))) + results = list(results) # exhaust prior to release #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") - results = self.executor.map(partial(do_work_of_slopes_new, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - logger=self.logger, - plotter=self.plotter,), - boxes, contours, contours_par, range(len(contours_par))) + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + results = self.executor.map(partial(do_work_of_slopes_new, + textline_mask_tot_ea=textline_mask_tot_shared, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) + results = list(results) # exhaust prior to release #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, + mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") - results = self.executor.map(partial(do_work_of_slopes_new_curved, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - mask_texts_only=mask_texts_only, + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + with share_ndarray(mask_texts_only) as mask_texts_only_shared: + results = self.executor.map(partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot_shared, + mask_texts_only=mask_texts_only_shared, num_col=num_col, scale_par=scale_par, slope_deskew=slope_deskew, @@ -1893,6 +1952,7 @@ class Eynollah: logger=self.logger, plotter=self.plotter,), boxes, contours, contours_par, range(len(contours_par))) + results = list(results) # exhaust prior to release #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) @@ -1906,10 +1966,11 @@ class Eynollah: img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction( - use_patches, img, self.model_textline, - marginal_of_patch_percent=0.15, n_batch_inference=3, - thresholding_for_artificial_class_in_light_version=self.textline_light, threshold_art_class_textline=self.threshold_art_class_textline) + prediction_textline = self.do_prediction(use_patches, img, self.model_textline, + marginal_of_patch_percent=0.15, + n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light, + threshold_art_class_textline=self.threshold_art_class_textline) #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) @@ -2001,16 +2062,18 @@ class Eynollah: boxes_sub_new = [] poly_sub = [] for mv in range(len(boxes_per_process)): - crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + crop_img, _ = crop_image_inside_box(boxes_per_process[mv], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img = crop_img[:, :, 0] crop_img = cv2.erode(crop_img, KERNEL, iterations=2) try: textline_con, hierarchy = return_contours_of_image(crop_img) - textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 - slope_corresponding_textregion = return_deskew_slop_old_mp(crop_img, sigma_des, + slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, logger=self.logger, plotter=self.plotter) except Exception as why: self.logger.error(why) @@ -2065,9 +2128,9 @@ class Eynollah: mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2106,7 +2169,7 @@ class Eynollah: ##polygons_of_images_fin.append(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind) - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2120,7 +2183,7 @@ class Eynollah: if h < 150 or w < 150: pass else: - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2131,7 +2194,13 @@ class Eynollah: [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page + return (text_regions_p_true, + erosion_hurts, + polygons_seplines, + polygons_of_images_fin, + image_page, + page_coord, + cont_page) def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") @@ -2189,7 +2258,8 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) - self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) + self.logger.debug("detecting textlines on %s with %d colors", + str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -2206,13 +2276,15 @@ class Eynollah: img_resized.shape[1], img_resized.shape[0], num_col_classifier) prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=1, - thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) + thresholding_for_some_classes_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1])) prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept( False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, - thresholding_for_artificial_class_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) + thresholding_for_artificial_class_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) ys = slice(*self.page_coord[0:2]) xs = slice(*self.page_coord[2:4]) prediction_regions_org[ys, xs] = prediction_regions_page @@ -2225,8 +2297,11 @@ class Eynollah: img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=2, - thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) - ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + thresholding_for_some_classes_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, + ###n_batch_inference=3, + ###thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2248,31 +2323,31 @@ class Eynollah: mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) + #print(jv, hir_seplines[0][232][3]) #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #sys.exit() polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) + ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) @@ -2288,7 +2363,13 @@ class Eynollah: #plt.show() #print("inside 4 ", time.time()-t_in) self.logger.debug("exit get_regions_light_v") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix, polygons_of_only_texts + return (text_regions_p_true, + erosion_hurts, + polygons_seplines, + textline_mask_tot_ea, + img_bin, + confidence_matrix, + polygons_of_only_texts) else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) self.logger.debug("exit get_regions_light_v") @@ -2371,9 +2452,9 @@ class Eynollah: mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2385,7 +2466,7 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_only_texts + return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_only_texts except: if self.input_binary: prediction_bin = np.copy(img_org) @@ -2408,21 +2489,17 @@ class Eynollah: #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) #prediction_regions_org = self.do_prediction(True, img, self.model_region) - #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - #prediction_regions_org = prediction_regions_org[:,:,0] - #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - mask_lines_only = (prediction_regions_org == 3)*1 mask_texts_only = (prediction_regions_org == 1)*1 mask_images_only= (prediction_regions_org == 2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2435,7 +2512,7 @@ class Eynollah: erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_only_texts + return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_only_texts def do_order_of_regions_full_layout( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2834,7 +2911,8 @@ class Eynollah: contours_new.append(contours_sep[ji]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) + only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, + pts=[contours_sep[ji]], color=(1,1,1)) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') @@ -2919,9 +2997,11 @@ class Eynollah: contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) if indiv==pixel_table: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, + max_area=1, min_area=0.001) else: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, + max_area=1, min_area=min_area) img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) img_comm = img_comm.astype(np.uint8) @@ -2956,8 +3036,14 @@ class Eynollah: y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) - cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text) - cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab) + (cx_tab_m_text, cy_tab_m_text, + x_min_tab_m_text, x_max_tab_m_text, + y_min_tab_m_text, y_max_tab_m_text, + _) = find_new_features_of_contours(contours_table_m_text) + (cx_tabl, cy_tabl, + x_min_tabl, x_max_tabl, + y_min_tabl, y_max_tabl, + _) = find_new_features_of_contours(contours_tab) if len(y_min_main_tab )>0: y_down_tabs=[] @@ -2967,9 +3053,15 @@ class Eynollah: y_down_tab=[] y_up_tab=[] for i_l in range(len(y_min_main_line)): - if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]: + if (y_min_main_tab[i_t] > y_min_main_line[i_l] and + y_max_main_tab[i_t] > y_min_main_line[i_l] and + y_min_main_tab[i_t] > y_max_main_line[i_l] and + y_max_main_tab[i_t] > y_min_main_line[i_l]): pass - elif y_min_main_tab[i_t] self.num_col_upper: - num_col_classifier = self.num_col_upper - - elif self.num_col_lower and not self.num_col_upper: - if num_col_classifier < self.num_col_lower: - num_col_classifier = self.num_col_lower - - elif self.num_col_upper and not self.num_col_lower: - if num_col_classifier > self.num_col_upper: - num_col_classifier = self.num_col_upper - - else: - pass - + num_col_classifier = min(self.num_col_upper or num_col_classifier, + max(self.num_col_lower or num_col_classifier, + num_col_classifier)) except Exception as why: self.logger.error(why) num_col = None @@ -3288,7 +3363,8 @@ class Eynollah: else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: - img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) + img_org, img_res, is_image_enhanced = \ + self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified @@ -3297,7 +3373,10 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, + scaler_h_textline, + scaler_w_textline, + num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3307,20 +3386,18 @@ class Eynollah: def run_deskew(self, textline_mask_tot_ea): #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') - slope_deskew = return_deskew_slop_old_mp(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, - logger=self.logger, plotter=self.plotter) - slope_first = 0 - + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, + map=self.executor.map, logger=self.logger, plotter=self.plotter) if self.plotter: self.plotter.save_deskewed_image(slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew) - return slope_deskew, slope_first + return slope_deskew def run_marginals( - self, image_page, textline_mask_tot_ea, mask_images, mask_lines, + self, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): - image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] + textline_mask_tot = textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 text_regions_p_1[mask_lines[:, :] == 1] = 3 @@ -3338,10 +3415,7 @@ class Eynollah: except Exception as e: self.logger.error("exception %s", e) - if self.plotter: - self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) - self.plotter.save_plot_of_layout_main(text_regions_p, image_page) - return textline_mask_tot, text_regions_p, image_page_rotated + return textline_mask_tot, text_regions_p def run_boxes_no_full_layout( self, image_page, textline_mask_tot, text_regions_p, @@ -3358,7 +3432,9 @@ class Eynollah: regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 - regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators = (text_regions_p[:, :] == 1) * 1 + # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 + #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) #print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 @@ -3427,7 +3503,8 @@ class Eynollah: pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, + peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction_n, 10, num_col_classifier) @@ -3435,7 +3512,8 @@ class Eynollah: img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, + text_regions_p.shape[0], text_regions_p.shape[1]) #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) @@ -3493,12 +3571,19 @@ class Eynollah: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, + table_prediction, slope_deskew) - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + text_regions_p_1_n = resize_image(text_regions_p_1_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, + text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 @@ -3513,12 +3598,19 @@ class Eynollah: else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, + table_prediction, slope_deskew) - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + text_regions_p_1_n = resize_image(text_regions_p_1_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, + text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 @@ -3577,7 +3669,8 @@ class Eynollah: pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, + peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( @@ -3586,8 +3679,9 @@ class Eynollah: img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, + text_regions_p.shape[0], + text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: img_revised_tab = np.copy(img_revised_tab2[:,:,0]) @@ -3658,7 +3752,8 @@ class Eynollah: ##else: ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, + ### regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 @@ -3721,7 +3816,10 @@ class Eynollah: min_cont_size_to_be_dilated = 10 if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version: - cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent) + (cx_conts, cy_conts, + x_min_conts, x_max_conts, + y_min_conts, y_max_conts, + _) = find_new_features_of_contours(contours_only_text_parent) args_cont_located = np.array(range(len(contours_only_text_parent))) diff_y_conts = np.abs(y_max_conts[:]-y_min_conts) @@ -3736,15 +3834,31 @@ class Eynollah: args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3] args_cont_located_included = args_cont_located[diff_x_ratio<1.3] - contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3] - contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3] + contours_only_text_parent_excluded = [contours_only_text_parent[ind] + #contours_only_text_parent[diff_x_ratio>=1.3] + for ind in range(len(contours_only_text_parent)) + if diff_x_ratio[ind]>=1.3] + contours_only_text_parent_included = [contours_only_text_parent[ind] + #contours_only_text_parent[diff_x_ratio<1.3] + for ind in range(len(contours_only_text_parent)) + if diff_x_ratio[ind]<1.3] - - cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3] - cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3] - - cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3] - cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3] + cx_conts_excluded = [cx_conts[ind] + #cx_conts[diff_x_ratio>=1.3] + for ind in range(len(cx_conts)) + if diff_x_ratio[ind]>=1.3] + cx_conts_included = [cx_conts[ind] + #cx_conts[diff_x_ratio<1.3] + for ind in range(len(cx_conts)) + if diff_x_ratio[ind]<1.3] + cy_conts_excluded = [cy_conts[ind] + #cy_conts[diff_x_ratio>=1.3] + for ind in range(len(cy_conts)) + if diff_x_ratio[ind]>=1.3] + cy_conts_included = [cy_conts[ind] + #cy_conts[diff_x_ratio<1.3] + for ind in range(len(cy_conts)) + if diff_x_ratio[ind]<1.3] #print(diff_x_ratio, 'ratio') text_regions_p = text_regions_p.astype('uint8') @@ -3766,7 +3880,10 @@ class Eynollah: contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated) contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated) - indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included) + indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = \ + self.return_indexes_of_contours_located_inside_another_list_of_contours( + contours_only_dilated, contours_only_text_parent_included, + cx_conts_included, cy_conts_included, args_cont_located_included) if len(args_cont_located_excluded)>0: @@ -3779,7 +3896,7 @@ class Eynollah: flattened_array = np.concatenate([arr.ravel() for arr in array_list]) #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques') - missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) ) + missing_textregions = list( set(range(len(contours_only_text_parent))) - set(flattened_array) ) #print(missing_textregions, 'missing_textregions') for ind in missing_textregions: @@ -3899,12 +4016,13 @@ class Eynollah: region_with_curr_order = ordered[ind] if region_with_curr_order < len(contours_only_dilated): if np.isscalar(indexes_of_located_cont[region_with_curr_order]): - org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]] + org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) else: arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order]) - org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list ( + org_contours_indexes.extend( + np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) else: - org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]] + org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return org_contours_indexes, region_ids @@ -3912,6 +4030,183 @@ class Eynollah: region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return ordered, region_ids + def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.2*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + print(len(peaks_real), 'len(peaks_real)') + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + arg_sort4 =arg_sort[::-1][:4] + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) + #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final[0], peaks_final[1] + else: + pass + + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + self, peaks_real, sum_smoothed, start_split, end_split): + + peaks_real = peaks_real[(peaks_realstart_split)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + arg_sort4 =arg_sort[::-1][:4] + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) + return peaks_final[0] + + def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.15*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + mid = int(width/2.) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + if len(peaks_real)>70: + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, width1, mid+2) + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, mid-2, width2) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peak_start, peak_start], [0, height-1]) + #plt.plot([peak_end, peak_end], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peak_start, peak_end + else: + pass + + def return_ocr_of_textline_without_common_section( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image) + if split_point: + image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + + #pixel_values1 = processor(image1, return_tensors="pt").pixel_values + #pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values + generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) + generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + #print(generated_text_merged,'generated_text_merged') + + #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + + #generated_text = generated_text1 + ' ' + generated_text2 + generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] + + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + else: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + #print(generated_text,'generated_text') + #print('########################################') + return generated_text + + def return_ocr_of_textline( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + try: + width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) + + image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) + + pixel_values1 = processor(image1, return_tensors="pt").pixel_values + pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + + match = sq(None, generated_text1, generated_text2).find_longest_match( + 0, len(generated_text1), 0, len(generated_text2)) + generated_text = generated_text1 + generated_text2[match.b+match.size:] + except: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] @@ -3940,313 +4235,8 @@ class Eynollah: return x_differential_new - def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): - #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): - for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] - area = cv2.contourArea(con_ind) - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 0.1) - y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: - dilation_m1 = 8 - if dilation_m1<6: - dilation_m1 = 6 - #print(dilation_m1, 'dilation_m1') - dilation_m1 = 6 - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - #print(results,'results') - results[results==0] = 1 - - diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - #indices_2 = indices_2[1:] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - - def dilate_textregions_contours(self, all_found_textline_polygons): - #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): - con_ind = all_found_textline_polygons[j] - #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') - area = cv2.contourArea(con_ind) - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 0.1) - y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: - dilation_m1 = 8 - if dilation_m1<6: - dilation_m1 = 6 - #print(dilation_m1, 'dilation_m1') - dilation_m1 = 4#6 - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - #print(results,'results') - results[results==0] = 1 - - diff_result = np.diff(results) - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - #indices_2 = indices_2[1:] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - - def dilate_textline_contours(self, all_found_textline_polygons): - for j in range(len(all_found_textline_polygons)): - for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] - area = cv2.contourArea(con_ind) - - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 3) - y_differential = gaussian_filter1d(y_differential, 3) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.35) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.35) - - if dilation_m1>12: - dilation_m1 = 12 - if dilation_m1<4: - dilation_m1 = 4 - #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - results[results==0] = 1 - - diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - - def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"): + def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image, + marginal_cnts=None, type_contour="textregion"): if type_contour=="textregion": areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] area_tot = image.shape[0]*image.shape[1] @@ -4266,7 +4256,10 @@ class Eynollah: results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big] if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], + (cx_main[ind_small], + cy_main[ind_small]), + False) for ind in range(len(marginal_cnts))] results_marginal = np.array(results_marginal) @@ -4321,7 +4314,10 @@ class Eynollah: args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], + (cx_main_tot[ij], + cy_main_tot[ij]), + False) for ind in args_with_bigger_area ] results = np.array(results) if np.any(results==1): @@ -4333,14 +4329,16 @@ class Eynollah: textregion_index_to_del = np.array(textregion_index_to_del) textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) for ind_u_a_trs in np.unique(textregion_index_to_del): - textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] + textline_in_textregion_index_to_del_ind = \ + textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] for ittrd in textline_in_textregion_index_to_del_ind: contours[ind_u_a_trs].pop(ittrd) return contours - def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc): + def return_indexes_of_contours_located_inside_another_list_of_contours( + self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc): indexes_of_located_cont = [] center_x_coordinates_of_located = [] center_y_coordinates_of_located = [] @@ -4354,7 +4352,8 @@ class Eynollah: for ind in range(len(cy_main_loc)) ] results = np.array(results) indexes_in = np.where((results == 0) | (results == 1)) - indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1)) + # [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1)) + indexes = indexes_loc[indexes_in] indexes_of_located_cont.append(indexes) center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] ) @@ -4364,7 +4363,9 @@ class Eynollah: def filter_contours_without_textline_inside( - self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions): + self, contours, text_con_org, contours_textline, + contours_only_text_parent_d_ordered, + conf_contours_textregions): ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] @@ -4382,144 +4383,32 @@ class Eynollah: ###contours_with_textline = [] ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) + ###results = [cv2.pointPolygonTest(con_tr, + ### (cx_main_textline[index_textline_con], + ### cy_main_textline[index_textline_con]), + ### False) ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] ###results = np.array(results) ###if np.any(results==1): ###contours_with_textline.append(con_tr) - textregion_index_to_del = [] + textregion_index_to_del = set() for index_textregion, textlines_textregion in enumerate(contours_textline): - if len(textlines_textregion)==0: - textregion_index_to_del.append(index_textregion) + if len(textlines_textregion) == 0: + textregion_index_to_del.add(index_textregion) + def filterfun(lis): + if len(lis) == 0: + return [] + if len(textregion_index_to_del) == 0: + return lis + return list(np.delete(lis, list(textregion_index_to_del))) - uniqe_args_trs = np.unique(textregion_index_to_del) - uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] - - for ind_u_a_trs in uniqe_args_trs_sorted: - conf_contours_textregions.pop(ind_u_a_trs) - contours.pop(ind_u_a_trs) - contours_textline.pop(ind_u_a_trs) - text_con_org.pop(ind_u_a_trs) - if len(contours_only_text_parent_d_ordered) > 0: - contours_only_text_parent_d_ordered.pop(ind_u_a_trs) - - return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) - - def dilate_textlines(self, all_found_textline_polygons): - for j in range(len(all_found_textline_polygons)): - for i in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][i] - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) - mult = x_biger_than_x*x_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if y_differential[0]==0: - y_differential[0] = 0.1 - if y_differential[-1]==0: - y_differential[-1]= 0.1 - y_differential = [y_differential[ind] if y_differential[ind] != 0 - else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) - for ind in range(len(y_differential))] - - if y_differential[0]==0.1: - y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: - y_differential[-1] = y_differential[-2] - y_differential.append(y_differential[0]) - - y_differential = [-1 if y_differential[ind] < 0 else 1 - for ind in range(len(y_differential))] - y_differential = self.return_it_in_two_groups(y_differential) - y_differential = np.array(y_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - - try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - - try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 - except: - pass - - else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - mult = y_biger_than_x*y_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: - x_differential[0] = 0.1 - if x_differential[-1]==0: - x_differential[-1]= 0.1 - x_differential = [x_differential[ind] if x_differential[ind] != 0 - else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) - for ind in range(len(x_differential))] - - if x_differential[0]==0.1: - x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: - x_differential[-1] = x_differential[-2] - x_differential.append(x_differential[0]) - - x_differential = [-1 if x_differential[ind] < 0 else 1 - for ind in range(len(x_differential))] - x_differential = self.return_it_in_two_groups(x_differential) - x_differential = np.array(x_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 - - try: - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 - - try: - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 - except: - pass - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] - - return all_found_textline_polygons + return (filterfun(contours), + filterfun(text_con_org), + filterfun(conf_contours_textregions), + filterfun(contours_textline), + filterfun(contours_only_text_parent_d_ordered), + np.arange(len(contours) - len(textregion_index_to_del))) def delete_regions_without_textlines( self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, @@ -4550,7 +4439,9 @@ class Eynollah: return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort) - def separate_marginals_to_left_and_right_and_order_from_top_to_down(self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width): + def separate_marginals_to_left_and_right_and_order_from_top_to_down( + self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + slopes_marginals, mid_point_of_page_width): cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( polygons_of_marginals) @@ -4560,8 +4451,10 @@ class Eynollah: poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) - all_found_textline_polygons_marginals_left = list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) - all_found_textline_polygons_marginals_right = list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) + all_found_textline_polygons_marginals_left = \ + list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) + all_found_textline_polygons_marginals_right = \ + list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) @@ -4572,20 +4465,38 @@ class Eynollah: cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] - ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), key=lambda x: x[0])] - ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), key=lambda x: x[0])] + ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), + key=lambda x: x[0])] + ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), + key=lambda x: x[0])] - ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, all_found_textline_polygons_marginals_left), key=lambda x: x[0])] - ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, all_found_textline_polygons_marginals_right), key=lambda x: x[0])] + ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, + all_found_textline_polygons_marginals_left), + key=lambda x: x[0])] + ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, + all_found_textline_polygons_marginals_right), + key=lambda x: x[0])] - ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, all_box_coord_marginals_left), key=lambda x: x[0])] - ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, all_box_coord_marginals_right), key=lambda x: x[0])] + ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, + all_box_coord_marginals_left), + key=lambda x: x[0])] + ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, + all_box_coord_marginals_right), + key=lambda x: x[0])] - ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), key=lambda x: x[0])] - ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), key=lambda x: x[0])] + ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), + key=lambda x: x[0])] + ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), + key=lambda x: x[0])] - return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals - + return (ordered_left_marginals, + ordered_right_marginals, + ordered_left_marginals_textline, + ordered_right_marginals_textline, + ordered_left_marginals_bbox, + ordered_right_marginals_bbox, + ordered_left_slopes_marginals, + ordered_right_slopes_marginals) def run(self, overwrite: bool = False, @@ -4667,12 +4578,14 @@ class Eynollah: def run_single(self): t0 = time.time() - self.logger.info(f"Processing file: {self.writer.image_filename}") + self.logger.info(f"Processing file: {self.writer.image_filename}") self.logger.info("Step 1/5: Image Enhancement") - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = \ + self.run_enhancement(self.light_version) - self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns") + self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, " + f"{self.dpi} DPI, {num_col_classifier} columns") if is_image_enhanced: self.logger.info("Enhancement applied") @@ -4683,7 +4596,8 @@ class Eynollah: if self.extract_only_images: self.logger.info("Step 2/5: Image Extraction Mode") - text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ + text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, \ + image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], @@ -4715,21 +4629,20 @@ class Eynollah: M_main_tot = [cv2.moments(all_found_textline_polygons[j]) for j in range(len(all_found_textline_polygons))] - w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] for j in range(len(all_found_textline_polygons))] + w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] + for j in range(len(all_found_textline_polygons))] w_h_textlines = [w_h_textlines[j][0] / float(w_h_textlines[j][1]) for j in range(len(w_h_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines)#all_found_textline_polygons[::-1] - - all_found_textline_polygons=[ all_found_textline_polygons ] - - all_found_textline_polygons = self.dilate_textregions_contours_textline_version( - all_found_textline_polygons) + all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted( + #all_found_textline_polygons[::-1] + all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines) + all_found_textline_polygons = [ all_found_textline_polygons ] + all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") - order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] @@ -4743,21 +4656,29 @@ class Eynollah: all_found_textline_polygons_marginals_right = [] all_box_coord_marginals_left = [] all_box_coord_marginals_right = [] - polygons_lines_xml = [] + polygons_seplines = [] contours_tables = [] conf_contours_textregions =[0] if self.ocr and not self.tr: gc.collect() - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, self.prediction_model, + self.b_s_ocr, self.num_to_char, textline_light=True) else: ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) + all_found_textline_polygons, page_coord, polygons_of_images, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, contours_tables, + ocr_all_textlines=ocr_all_textlines, + conf_contours_textregion=conf_contours_textregions, + skip_layout_reading_order=self.skip_layout_and_reading_order) self.logger.info("Basic processing complete") return pcgts @@ -4767,7 +4688,8 @@ class Eynollah: if self.light_version: self.logger.info("Using light version processing") - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix, polygons_text_early = \ + text_regions_p_1 ,erosion_hurts, polygons_seplines, textline_mask_tot_ea, \ + img_bin_light, confidence_matrix, polygons_text_early = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4779,22 +4701,22 @@ class Eynollah: img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, - num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + num_col_classifier, num_column_is_classified, + erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, polygons_text_early = \ + text_regions_p_1, erosion_hurts, polygons_seplines, polygons_text_early = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info(f"Textregion detection took {time.time() - t1:.1f}s") @@ -4803,7 +4725,8 @@ class Eynollah: t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, + erosion_hurts) self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s") #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) @@ -4824,9 +4747,7 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info(f"Textline detection took {time.time() - t1:.1f}s") t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - if np.abs(slope_deskew) > 0.01: # Only log if there is significant skew - self.logger.info(f"Applied deskew correction: {slope_deskew:.2f} degrees") + slope_deskew = self.run_deskew(textline_mask_tot_ea) self.logger.info(f"Deskewing took {time.time() - t1:.1f}s") elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] @@ -4844,11 +4765,10 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p, image_page_rotated = \ - self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, + textline_mask_tot, text_regions_p = \ + self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - if image_page.shape[0]!=0 and image_page.shape[1]!=0: # if ratio of text regions to page area is smaller that 0.3, deskew angle is not aloowed to exceed 45 if ( ( text_regions_p[:,:]==1).sum() + (text_regions_p[:,:]==4).sum() ) / float(image_page.shape[0]*image_page.shape[1] ) <= 0.3 and abs(slope_deskew) > 45: @@ -4857,6 +4777,10 @@ class Eynollah: if (text_regions_p[:,:]==1).sum() == 0: text_regions_p[:,:][text_regions_p[:,:]==4] = 1 + if self.plotter: + self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) + self.plotter.save_plot_of_layout_main(text_regions_p, image_page) + self.logger.info("Step 3/5: Text Line Detection") if self.curved_line: @@ -4871,24 +4795,25 @@ class Eynollah: textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s") ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + polygons_of_images, img_revised_tab, text_regions_p_1_n, \ + textline_mask_tot_d, regions_without_separators_d, \ boxes, boxes_d, polygons_of_marginals, contours_tables = \ self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + polygons_of_images, img_revised_tab, text_regions_p_1_n, \ + textline_mask_tot_d, regions_without_separators_d, \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 @@ -4952,8 +4877,10 @@ class Eynollah: areas_cnt_text_d = self.return_list_of_contours_with_desired_order( areas_cnt_text_d, index_con_parents_d) - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = \ + find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = \ + find_new_features_of_contours(contours_only_text_parent_d) try: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] @@ -5015,32 +4942,38 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout( [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], - polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [], - cont_page, polygons_lines_xml) + polygons_of_marginals, polygons_of_marginals, + empty_marginals, empty_marginals, + empty_marginals, empty_marginals, + [], [], [], [], + cont_page, polygons_seplines) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, - polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], - cont_page, polygons_lines_xml, contours_tables) + polygons_of_marginals, polygons_of_marginals, + empty_marginals, empty_marginals, + empty_marginals, empty_marginals, + [], [], [], + cont_page, polygons_seplines, contours_tables) return pcgts #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours( - contours_only_text_parent) + contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( - contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) + contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, + marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent, self.image, confidence_matrix) + #txt_con_org = dilate_textregion_contours(txt_con_org) + #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) else: txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) + contours_only_text_parent, self.image, confidence_matrix) #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) #print("text region early 5 in %.1fs", time.time() - t0) @@ -5051,39 +4984,42 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, \ + # contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, \ + # polygons_of_marginals, polygons_of_marginals, _ = \ # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, - # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + # boxes_marginals, polygons_of_marginals, polygons_of_marginals, + # np.array(range(len(polygons_of_marginals)))) + all_found_textline_polygons = dilate_textline_contours( all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons_marginals = dilate_textline_contours( all_found_textline_polygons_marginals) - contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ + contours_only_text_parent, txt_con_org, conf_contours_textregions, \ + all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, + contours_only_text_parent_d_ordered, conf_contours_textregions) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: @@ -5091,31 +5027,37 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, - image_page_rotated, boxes_text, text_only, + boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, - image_page_rotated, boxes_marginals, text_only, + boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) mid_point_of_page_width = text_regions_p.shape[1] / 2. - polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right = self.separate_marginals_to_left_and_right_and_order_from_top_to_down(polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width) + (polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes_marginals_left, slopes_marginals_right) = \ + self.separate_marginals_to_left_and_right_and_order_from_top_to_down( + polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + slopes_marginals, mid_point_of_page_width) #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred') if self.full_layout: @@ -5139,40 +5081,41 @@ class Eynollah: all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \ conf_contours_textregions, conf_contours_textregions_h = fun( - text_regions_p, regions_fully, contours_only_text_parent, - all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) + text_regions_p, regions_fully, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + label_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, label_img) ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: - pixel_seps = 6 + label_seps = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) + num_col_classifier, self.tables, label_seps, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered) + num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + num_col_classifier, self.tables, label_seps) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + num_col_classifier, self.tables, label_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -5185,11 +5128,13 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, - num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col_classifier, erosion_hurts, self.tables, self.right2left, + logger=self.logger) else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, - num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col_classifier, erosion_hurts, self.tables, self.right2left, + logger=self.logger) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) @@ -5204,7 +5149,7 @@ class Eynollah: self.logger.info("Right-to-left mode enabled") if self.headers_off: self.logger.info("Headers ignored in reading order") - + if self.reading_order_machine_based: tror = time.time() order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( @@ -5215,7 +5160,8 @@ class Eynollah: contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, + boxes_d, textline_mask_tot_d) self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") if self.ocr and not self.tr: @@ -5228,27 +5174,37 @@ class Eynollah: gc.collect() if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_left, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_marginals_left = None if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_right, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_marginals_right = None if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: - ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_h, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_h = None if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: - ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines( + image_page, polygons_of_drop_capitals, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_drop = None else: @@ -5263,9 +5219,15 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, - polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) + polygons_of_images, contours_tables, polygons_of_drop_capitals, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, + ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, + ocr_all_textlines_drop, + conf_contours_textregions, conf_contours_textregions_h) return pcgts @@ -5300,18 +5262,14 @@ class Eynollah: if self.ocr and self.tr: self.logger.info("Step 4.5/5: OCR Processing") - if torch.cuda.is_available(): self.logger.info("Using GPU acceleration") else: self.logger.info("Using CPU processing") - if self.light_version: self.logger.info("Using light version OCR") - if self.textline_light: self.logger.info("Using light text line detection for OCR") - self.logger.info("Processing text lines...") device = cuda.get_current_device() @@ -5334,7 +5292,7 @@ class Eynollah: box_ind = all_box_coord[indexing] #print(ind_poly,np.shape(ind_poly), 'ind_poly') #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) #print(ind_poly_copy) ind_poly[ind_poly<0] = 0 x, y, w, h = cv2.boundingRect(ind_poly) @@ -5356,7 +5314,8 @@ class Eynollah: img_croped = img_poly_on_img[y:y+h, x:x+w, :] #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + text_ocr = self.return_ocr_of_textline_without_common_section( + img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) ocr_textline_in_textregion.append(text_ocr) ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) @@ -5364,13 +5323,19 @@ class Eynollah: elif self.ocr and not self.tr: gc.collect() if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_left, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_right, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None @@ -5383,11 +5348,14 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) - - self.logger.info(f"Output file: {self.writer.output_filename}") + all_found_textline_polygons, all_box_coord, polygons_of_images, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, contours_tables, ocr_all_textlines, + ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, + conf_contours_textregions) return pcgts @@ -5406,7 +5374,6 @@ class Eynollah_ocr: min_conf_value_of_textline_text : Optional[float]=None, logger=None, ): - self.dir_models = dir_models self.model_name = model_name self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text @@ -5437,7 +5404,7 @@ class Eynollah_ocr: if self.model_name: self.model_ocr_dir = self.model_name else: - self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250904" + self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5529,7 +5496,9 @@ class Eynollah_ocr: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + textline_coords = np.array( [ [int(x.split(',')[0]), + int(x.split(',')[1]) ] + for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) if dir_out_image_text: @@ -5545,9 +5514,12 @@ class Eynollah_ocr: img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 - self.logger.debug("processing %d lines for '%s'", len(cropped_lines), nn.attrib['id']) + self.logger.debug("processing %d lines for '%s'", + len(cropped_lines), nn.attrib['id']) if h2w_ratio > 0.1: - cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) + cropped_lines.append(resize_image(img_crop, + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width) ) cropped_lines_meging_indexing.append(0) indexer_b_s+=1 if indexer_b_s==self.b_s: @@ -5556,8 +5528,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5565,7 +5539,9 @@ class Eynollah_ocr: splited_images, _ = return_textlines_split_if_needed(img_crop, None) #print(splited_images) if splited_images: - cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) + cropped_lines.append(resize_image(splited_images[0], + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(1) indexer_b_s+=1 @@ -5575,13 +5551,17 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged - cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) + cropped_lines.append(resize_image(splited_images[1], + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(-1) indexer_b_s+=1 @@ -5591,8 +5571,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5607,8 +5589,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5639,15 +5623,22 @@ class Eynollah_ocr: ####n_end = (i+1)*self.b_s ####imgs = cropped_lines[n_start:n_end] ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - ####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - ####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + ####generated_ids_merged = self.model_ocr.generate( + #### pixel_values_merged.to(self.device)) + ####generated_text_merged = self.processor.batch_decode( + #### generated_ids_merged, skip_special_tokens=True) ####extracted_texts = extracted_texts + generated_text_merged del cropped_lines gc.collect() - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] #print(extracted_texts_merged, len(extracted_texts_merged)) @@ -5669,7 +5660,8 @@ class Eynollah_ocr: w_bb = bb_ind[2] h_bb = bb_ind[3] - font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) ) + font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], + font.path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) @@ -5687,25 +5679,27 @@ class Eynollah_ocr: #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') #######text_by_textregion = [] #######for ind in unique_cropped_lines_region_indexer: - #######extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] - + #######ind = np.array(cropped_lines_region_indexer)==ind + #######extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] #######text_by_textregion.append(" ".join(extracted_texts_merged_un)) text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + ind = np.array(cropped_lines_region_indexer) == ind + extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] if len(extracted_texts_merged_un)>1: text_by_textregion_ind = "" next_glue = "" for indt in range(len(extracted_texts_merged_un)): - if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'): - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1] + if (extracted_texts_merged_un[indt].endswith('⸗') or + extracted_texts_merged_un[indt].endswith('-') or + extracted_texts_merged_un[indt].endswith('¬')): + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1] next_glue = "" else: - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt] + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt] next_glue = " " text_by_textregion.append(text_by_textregion_ind) - else: text_by_textregion.append(" ".join(extracted_texts_merged_un)) @@ -5763,7 +5757,9 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + ###sample_order = [(id_to_order[tid], text) + ### for tid, text in zip(id_textregions, textregions_by_existing_ids) + ### if tid in id_to_order] ##ordered_texts_sample = [text for _, text in sorted(sample_order)] ##tot_page_text = ' '.join(ordered_texts_sample) @@ -5837,7 +5833,9 @@ class Eynollah_ocr: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + textline_coords = np.array( [ [int(x.split(',')[0]), + int(x.split(',')[1]) ] + for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) @@ -5869,17 +5867,19 @@ class Eynollah_ocr: img_crop[mask_poly==0] = 255 else: - #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w*h) , 'didi') + # print(file_name, angle_degrees, w*h, + # mask_poly[:,:,0].sum(), + # mask_poly[:,:,0].sum() /float(w*h) , + # 'didi') if angle_degrees > 3: better_des_slope = get_orientation_moments(textline_coords) - img_crop = rotate_image_with_padding(img_crop, better_des_slope ) - + img_crop = rotate_image_with_padding(img_crop, better_des_slope) if dir_in_bin is not None: - img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope ) + img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope) - mask_poly = rotate_image_with_padding(mask_poly, better_des_slope ) + mask_poly = rotate_image_with_padding(mask_poly, better_des_slope) mask_poly = mask_poly.astype('uint8') #new bounding box @@ -5890,7 +5890,6 @@ class Eynollah_ocr: if not self.do_not_mask_with_textline_contour: img_crop[mask_poly==0] = 255 - if dir_in_bin is not None: img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] if not self.do_not_mask_with_textline_contour: @@ -5898,11 +5897,14 @@ class Eynollah_ocr: if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90: if dir_in_bin is not None: - img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + img_crop, img_crop_bin = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly, img_crop_bin) else: - img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + img_crop, _ = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly) - else: better_des_slope = 0 if not self.do_not_mask_with_textline_contour: @@ -5915,13 +5917,18 @@ class Eynollah_ocr: else: if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90: if dir_in_bin is not None: - img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + img_crop, img_crop_bin = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly, img_crop_bin) else: - img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + img_crop, _ = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly) if not self.export_textline_images_and_text: if w_scaled < 750:#1.5*image_width: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop, image_height, image_width) cropped_lines.append(img_fin) if abs(better_des_slope) > 45: cropped_lines_ver_index.append(1) @@ -5930,13 +5937,15 @@ class Eynollah_ocr: cropped_lines_meging_indexing.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) else: splited_images, splited_images_bin = return_textlines_split_if_needed( img_crop, img_crop_bin if dir_in_bin is not None else None) if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images[0], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) @@ -5945,7 +5954,8 @@ class Eynollah_ocr: else: cropped_lines_ver_index.append(0) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images[1], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) @@ -5956,13 +5966,16 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images_bin[0], image_height, image_width) cropped_lines_bin.append(img_fin) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images_bin[1], image_height, image_width) cropped_lines_bin.append(img_fin) else: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -5972,7 +5985,8 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) if self.export_textline_images_and_text: @@ -5984,7 +5998,8 @@ class Eynollah_ocr: if cheild_text.tag.endswith("Unicode"): textline_text = cheild_text.text if textline_text: - base_name = os.path.join(dir_out, file_name + '_line_' + str(indexer_textlines)) + base_name = os.path.join( + dir_out, file_name + '_line_' + str(indexer_textlines)) if self.pref_of_dataset: base_name += '_' + self.pref_of_dataset if not self.do_not_mask_with_textline_contour: @@ -6074,25 +6089,31 @@ class Eynollah_ocr: preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character - masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped = \ + np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) masked_means[np.isnan(masked_means)] = 0 masked_means_ver = masked_means[indices_ver] #print(masked_means_ver, 'pred_max_not_unk') - indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + indices_where_flipped_conf_value_is_higher = \ + np.where(masked_means_flipped > masked_means_ver)[0] #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] - preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds[indices_to_be_replaced,:,:] = \ + preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] if dir_in_bin is not None: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) @@ -6101,35 +6122,42 @@ class Eynollah_ocr: preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character - masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped = \ + np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) masked_means[np.isnan(masked_means)] = 0 masked_means_ver = masked_means[indices_ver] #print(masked_means_ver, 'pred_max_not_unk') - indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + indices_where_flipped_conf_value_is_higher = \ + np.where(masked_means_flipped > masked_means_ver)[0] #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] - preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds_bin[indices_to_be_replaced,:,:] = \ + preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] preds = (preds + preds_bin) / 2. - pred_texts = decode_batch_predictions(preds, self.num_to_char) preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) for ib in range(imgs.shape[0]): pred_texts_ib = pred_texts[ib].replace("[UNK]", "") @@ -6144,31 +6172,40 @@ class Eynollah_ocr: del cropped_lines_bin gc.collect() - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] - extracted_conf_value_merged = [extracted_conf_value[ind] if cropped_lines_meging_indexing[ind]==0 else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_conf_value_merged = [extracted_conf_value[ind] + if cropped_lines_meging_indexing[ind]==0 + else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] - extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] for ind_cfm in range(len(extracted_texts_merged)) if extracted_texts_merged[ind_cfm] is not None] + extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] + for ind_cfm in range(len(extracted_texts_merged)) + if extracted_texts_merged[ind_cfm] is not None] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if dir_out_image_text: - #font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = importlib_resources.files(__package__) / "Charis-Regular.ttf" with importlib_resources.as_file(font) as font: font = ImageFont.truetype(font=font, size=40) for indexer_text, bb_ind in enumerate(total_bb_coordinates): - - x_bb = bb_ind[0] y_bb = bb_ind[1] w_bb = bb_ind[2] h_bb = bb_ind[3] - font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) ) + font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], + font.path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) @@ -6185,24 +6222,25 @@ class Eynollah_ocr: text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + ind = np.array(cropped_lines_region_indexer)==ind + extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] if len(extracted_texts_merged_un)>1: text_by_textregion_ind = "" next_glue = "" for indt in range(len(extracted_texts_merged_un)): - if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'): - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1] + if (extracted_texts_merged_un[indt].endswith('⸗') or + extracted_texts_merged_un[indt].endswith('-') or + extracted_texts_merged_un[indt].endswith('¬')): + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1] next_glue = "" else: - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt] + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt] next_glue = " " text_by_textregion.append(text_by_textregion_ind) - else: text_by_textregion.append(" ".join(extracted_texts_merged_un)) #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion') - - + ###index_tot_regions = [] ###tot_region_ref = [] @@ -6251,7 +6289,8 @@ class Eynollah_ocr: if childtest3.tag.endswith("TextEquiv"): for child_uc in childtest3: if child_uc.tag.endswith("Unicode"): - childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}") + childtest3.set('conf', + f"{extracted_conf_value_merged[indexer]:.2f}") child_uc.text = extracted_texts_merged[indexer] indexer = indexer + 1 @@ -6267,7 +6306,9 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + ###sample_order = [(id_to_order[tid], text) + ### for tid, text in zip(id_textregions, textregions_by_existing_ids) + ### if tid in id_to_order] ##ordered_texts_sample = [text for _, text in sorted(sample_order)] ##tot_page_text = ' '.join(ordered_texts_sample) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 6eeabd0..52bf3ef 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1,3 +1,5 @@ +from typing import Tuple +from logging import getLogger import time import math @@ -298,9 +300,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_end_with_child_without_mother, new_main_sep_y) +def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: + return (box[1], box[1] + box[3], + box[0], box[0] + box[2]) + +def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]: + return (slice(box[1], box[1] + box[3]), + slice(box[0], box[0] + box[2])) + def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] - return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + image_box = img_org_copy[box2slice(box)] + return image_box, box2rect(box) def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) @@ -373,6 +383,10 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8): return np.std(z) def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): + if not regions_without_separators.any(): + return 0, [] + #plt.imshow(regions_without_separators) + #plt.show() regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() @@ -392,6 +406,9 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl zneg = gaussian_filter1d(zneg, sigma_) peaks_neg, _ = find_peaks(zneg, height=0) + #plt.plot(zneg) + #plt.plot(peaks_neg, zneg[peaks_neg], 'rx') + #plt.show() peaks, _ = find_peaks(z, height=0) peaks_neg = peaks_neg - 10 - 10 @@ -406,9 +423,13 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl (peaks_neg < (regions_without_separators.shape[1] - 370))] interest_pos = z[peaks] interest_pos = interest_pos[interest_pos > 10] + if not interest_pos.any(): + return 0, [] # plt.plot(z) # plt.show() interest_neg = z[peaks_neg] + if not interest_neg.any(): + return 0, [] min_peaks_pos = np.min(interest_pos) max_peaks_pos = np.max(interest_pos) @@ -955,11 +976,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light( regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full.shape[0] // zoom), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] + contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent] ### cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ - find_new_features_of_contours(contours_only_text_parent) + find_new_features_of_contours(contours_only_text_parent_z) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main @@ -982,8 +1003,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] + for ii, con in enumerate(contours_only_text_parent_z): img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) @@ -992,25 +1012,30 @@ def check_any_text_region_in_model_one_is_main_or_header_light( (regions_model_full[:,:,0]==2)).sum() pixels_main = all_pixels - pixels_header - if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 - contours_only_text_parent_head.append(con) + if (( pixels_header / float(pixels_main) >= 0.6 and + length_con[ii] / float(height_con[ii]) >= 1.3 and + length_con[ii] / float(height_con[ii]) <= 3 ) or + ( pixels_header / float(pixels_main) >= 0.3 and + length_con[ii] / float(height_con[ii]) >=3 )): + + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2 + contours_only_text_parent_head.append(contours_only_text_parent[ii]) + conf_contours_head.append(None) # why not conf_contours[ii], too? if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) - conf_contours_head.append(None) + else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 - contours_only_text_parent_main.append(con) + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1 + contours_only_text_parent_main.append(contours_only_text_parent[ii]) conf_contours_main.append(conf_contours[ii]) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) - #print(all_pixels,pixels_main,pixels_header) ### to make it faster @@ -1018,8 +1043,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light( # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full.shape[0] // zoom), # interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] - contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### return (regions_model_1, @@ -1626,12 +1649,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, def return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, - num_col_classifier, erosion_hurts, tables, right2left_readingorder): + num_col_classifier, erosion_hurts, tables, + right2left_readingorder, + logger=None): if right2left_readingorder: regions_without_separators = cv2.flip(regions_without_separators,1) + if logger is None: + logger = getLogger(__package__) + logger.debug('enter return_boxes_of_images_by_order_of_reading_new') + boxes=[] peaks_neg_tot_tables = [] + splitter_y_new = np.array(splitter_y_new, dtype=int) for i in range(len(splitter_y_new)-1): #print(splitter_y_new[i],splitter_y_new[i+1]) matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & @@ -1644,24 +1674,19 @@ def return_boxes_of_images_by_order_of_reading_new( # 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))): if True: try: - if erosion_hurts: - num_col, peaks_neg_fin = find_num_col( - regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], - num_col_classifier, tables, multiplier=6.) - else: - num_col, peaks_neg_fin = find_num_col( - regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], - num_col_classifier, tables, multiplier=7.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :], + num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.) except: peaks_neg_fin=[] num_col = 0 try: - peaks_neg_fin_org=np.copy(peaks_neg_fin) if (len(peaks_neg_fin)+1)=2 or there_is_sep_with_child==1))): try: - y_grenze=int(splitter_y_new[i])+300 + y_grenze = splitter_y_new[i] + 300 #check if there is a big separator in this y_mains_sep_ohne_grenzen args_early_ys=np.arange(len(y_type_2)) #print(args_early_ys,'args_early_ys') - #print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) + #print(splitter_y_new[i], splitter_y_new[i+1]) - x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) & + x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) & + x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) & + args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] if len(y_type_2_up) > 0: y_main_separator_up = y_type_2_up [(x_starting_up==0) & @@ -1776,8 +1798,8 @@ def return_boxes_of_images_by_order_of_reading_new( args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) )) #print(args_to_be_kept,'args_to_be_kept') boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], - int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))]) - splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] + splitter_y_new[i], y_diff_main_separator_up.max()]) + splitter_y_new[i] = y_diff_main_separator_up.max() #print(splitter_y_new[i],'splitter_y_new[i]') y_type_2 = y_type_2[args_to_be_kept] @@ -1786,29 +1808,28 @@ def return_boxes_of_images_by_order_of_reading_new( y_diff_type_2 = y_diff_type_2[args_to_be_kept] #print('galdiha') - y_grenze=int(splitter_y_new[i])+200 + y_grenze = splitter_y_new[i] + 200 args_early_ys2=np.arange(len(y_type_2)) - y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) & + x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) & + x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] - args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & + args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) & (y_type_2 <= y_grenze)] #print(y_type_2_up,x_starting_up,x_ending_up,'didid') - nodes_in = [] + nodes_in = set() for ij in range(len(x_starting_up)): - nodes_in = nodes_in + list(range(int(x_starting_up[ij]), - int(x_ending_up[ij]))) - nodes_in = np.unique(nodes_in) + nodes_in.update(range(x_starting_up[ij], + x_ending_up[ij])) #print(nodes_in,'nodes_in') - if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): + if nodes_in == set(range(len(peaks_neg_tot)-1)): pass - elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)): + elif nodes_in == set(range(1, len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') @@ -1823,17 +1844,16 @@ def return_boxes_of_images_by_order_of_reading_new( pass #print('burdaydikh2') elif len(y_diff_main_separator_up)==0: - nodes_in = [] + nodes_in = set() for ij in range(len(x_starting_up)): - nodes_in = nodes_in + list(range(int(x_starting_up[ij]), - int(x_ending_up[ij]))) - nodes_in = np.unique(nodes_in) + nodes_in.update(range(x_starting_up[ij], + x_ending_up[ij])) #print(nodes_in,'nodes_in2') #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') - if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): + if nodes_in == set(range(len(peaks_neg_tot)-1)): pass - elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)): + elif nodes_in == set(range(1,len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') @@ -1858,26 +1878,25 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order=[] if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: if reading_order_type==1: - y_lines_by_order.append(int(splitter_y_new[i])) + y_lines_by_order.append(splitter_y_new[i]) x_start_by_order.append(0) x_end_by_order.append(len(peaks_neg_tot)-2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(int(x_start_without_mother[dj]), - int(x_end_without_mother[dj]))) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + columns_covered_by_mothers.update( + range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_not_covered = list(all_columns - columns_covered_by_mothers) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + + len(x_start_without_mother), + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, np.array(columns_not_covered, int)) x_starting = np.append(x_starting, x_start_without_mother) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1) x_ending = np.append(x_ending, x_end_without_mother) ind_args=np.arange(len(y_type_2)) @@ -1906,42 +1925,39 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order.append(x_end_column_sort[ii]-1) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(int(x_start_without_mother[dj]), - int(x_end_without_mother[dj]))) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + columns_covered_by_mothers.update( + range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_not_covered = list(all_columns - columns_covered_by_mothers) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother), + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, np.array(columns_not_covered, int)) x_starting = np.append(x_starting, x_start_without_mother) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1) x_ending = np.append(x_ending, x_end_without_mother) - columns_covered_by_with_child_no_mothers = [] + columns_covered_by_with_child_no_mothers = set() for dj in range(len(x_end_with_child_without_mother)): - columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ - list(range(int(x_start_with_child_without_mother[dj]), - int(x_end_with_child_without_mother[dj]))) - columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) - - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers)) + columns_covered_by_with_child_no_mothers.update( + range(x_start_with_child_without_mother[dj], + x_end_with_child_without_mother[dj])) + columns_not_covered_child_no_mother = list( + all_columns - columns_covered_by_with_child_no_mothers) #indexes_to_be_spanned=[] for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother) ind_args = np.arange(len(y_type_2)) - x_end_with_child_without_mother = np.array(x_end_with_child_without_mother) - x_start_with_child_without_mother = np.array(x_start_with_child_without_mother) + x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int) + x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int) for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] + x_end_biggest_column = \ + x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & (x_ending==x_end_biggest_column)] y_column_nc = y_type_2[args_all_biggest_lines] @@ -1951,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new( for i_c in range(len(y_column_nc)): if i_c==(len(y_column_nc)-1): ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & - (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] else: @@ -1967,21 +1983,19 @@ def return_boxes_of_images_by_order_of_reading_new( if len(x_diff_all_between_nm_wc)>0: biggest=np.argmax(x_diff_all_between_nm_wc) - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_starting_all_between_nm_wc)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(int(x_starting_all_between_nm_wc[dj]), - int(x_ending_all_between_nm_wc[dj]))) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(i_s_nc, x_end_biggest_column) - columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + columns_covered_by_mothers.update( + range(x_starting_all_between_nm_wc[dj], + x_ending_all_between_nm_wc[dj])) + child_columns = set(range(i_s_nc, x_end_biggest_column)) + columns_not_covered = list(child_columns - columns_covered_by_mothers) should_longest_line_be_extended=0 if (len(x_diff_all_between_nm_wc) > 0 and - set(list(range(int(x_starting_all_between_nm_wc[biggest]), - int(x_ending_all_between_nm_wc[biggest]))) + - list(columns_not_covered)) != set(all_columns)): + set(list(range(x_starting_all_between_nm_wc[biggest], + x_ending_all_between_nm_wc[biggest])) + + list(columns_not_covered)) != child_columns): should_longest_line_be_extended=1 index_lines_so_close_to_top_separator = \ np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & @@ -1991,9 +2005,12 @@ def return_boxes_of_images_by_order_of_reading_new( np.array(list(set(list(range(len(y_all_between_nm_wc)))) - set(list(index_lines_so_close_to_top_separator)))) if len(indexes_remained_after_deleting_closed_lines) > 0: - y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + y_all_between_nm_wc = \ + y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = \ + x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = \ + x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) @@ -2005,11 +2022,11 @@ def return_boxes_of_images_by_order_of_reading_new( x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest]) x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest]) except: - pass + logger.exception("cannot append") y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered)) - x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered) - x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int)) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1) ind_args_between=np.arange(len(x_ending_all_between_nm_wc)) for column in range(int(i_s_nc), int(x_end_biggest_column)): @@ -2078,52 +2095,50 @@ def return_boxes_of_images_by_order_of_reading_new( if len(y_in_cols)>0: y_down=np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] + y_down=splitter_y_new[i+1] #print(y_itself,'y_itself') boxes.append([peaks_neg_tot[column], peaks_neg_tot[column+1], y_itself, y_down]) except: + logger.exception("cannot assign boxes") boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], - int(splitter_y_new[i]), int(splitter_y_new[i+1])]) + splitter_y_new[i], splitter_y_new[i+1]]) else: y_lines_by_order=[] x_start_by_order=[] x_end_by_order=[] if len(x_starting)>0: - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_covered_by_lines_covered_more_than_2col = [] + columns_covered_by_lines_covered_more_than_2col = set() for dj in range(len(x_starting)): - if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns): - pass - else: - columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ - list(range(int(x_starting[dj]),int(x_ending[dj]) )) - columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col)) - columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) + if set(range(x_starting[dj], x_ending[dj])) != all_columns: + columns_covered_by_lines_covered_more_than_2col.update( + range(x_starting[dj], x_ending[dj])) + columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1, + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) if len(new_main_sep_y) > 0: x_starting = np.append(x_starting, 0) - x_ending = np.append(x_ending, len(peaks_neg_tot)-1) + x_ending = np.append(x_ending, len(peaks_neg_tot) - 1) else: x_starting = np.append(x_starting, x_starting[0]) x_ending = np.append(x_ending, x_ending[0]) else: - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered = list(set(all_columns)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) - ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + columns_not_covered = list(all_columns) + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered), + dtype=int) * splitter_y_new[i]) + ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) - ind_args=np.array(range(len(y_type_2))) + ind_args = np.arange(len(y_type_2)) for column in range(len(peaks_neg_tot)-1): #print(column,'column') @@ -2155,7 +2170,7 @@ def return_boxes_of_images_by_order_of_reading_new( x_start_itself=x_start_copy.pop(il) x_end_itself=x_end_copy.pop(il) - for column in range(int(x_start_itself), int(x_end_itself)+1): + for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] for yic in range(len(y_copy)): @@ -2169,7 +2184,7 @@ def return_boxes_of_images_by_order_of_reading_new( if len(y_in_cols)>0: y_down=np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] + y_down=splitter_y_new[i+1] #print(y_itself,'y_itself') boxes.append([peaks_neg_tot[column], peaks_neg_tot[column+1], @@ -2191,9 +2206,10 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_new = regions_without_separators.shape[1] - boxes[i][0] boxes[i][0] = x_start_new boxes[i][1] = x_end_new - return boxes, peaks_neg_tot_tables_new - else: - return boxes, peaks_neg_tot_tables + peaks_neg_tot_tables = peaks_neg_tot_tables_new + + logger.debug('exit return_boxes_of_images_by_order_of_reading_new') + return boxes, peaks_neg_tot_tables def is_image_filename(fname: str) -> bool: return fname.lower().endswith(('.jpg', diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 0e84153..0700ed4 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,7 +1,15 @@ +from typing import Sequence, Union +from numbers import Number from functools import partial +import itertools + import cv2 import numpy as np -from shapely import geometry +from scipy.sparse.csgraph import minimum_spanning_tree +from shapely.geometry import Polygon, LineString +from shapely.geometry.polygon import orient +from shapely import set_precision +from shapely.ops import unary_union, nearest_points from .rotate import rotate_image, rotation_image_new @@ -37,29 +45,28 @@ def get_text_region_boxes_by_given_contours(contours): return boxes, contours_new -def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): +def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] - for jv,c in enumerate(contours): - if len(c) < 3: # A polygon cannot have less than 3 points + for jv, contour in enumerate(contours): + if len(contour) < 3: # A polygon cannot have less than 3 points continue - polygon = geometry.Polygon([point[0] for point in c]) + polygon = contour2polygon(contour, dilate=dilate) area = polygon.area if (area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): - found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords], dtype=np.uint)) + found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early -def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): +def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] - for jv,c in enumerate(contours): - if len(c) < 3: # A polygon cannot have less than 3 points + for jv, contour in enumerate(contours): + if len(contour) < 3: # A polygon cannot have less than 3 points continue - polygon = geometry.Polygon([point[0] for point in c]) - # area = cv2.contourArea(c) + polygon = contour2polygon(contour, dilate=dilate) + # area = cv2.contourArea(contour) area = polygon.area ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area @@ -68,9 +75,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m area <= max_area * np.prod(image.shape[:2]) and # hierarchy[0][jv][3]==-1 True): - # print(c[0][0][1]) - found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords], dtype=np.int32)) + # print(contour[0][0][1]) + found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early def find_new_features_of_contours(contours_main): @@ -135,12 +141,12 @@ def return_parent_contours(contours, hierarchy): if hierarchy[0][i][3] == -1] return contours_parent -def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): +def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -247,30 +253,26 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) return cont_int[0], index_r_con, confidence_contour -def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map): +def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): if not len(cnts): return [], [] - - confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) - img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) - ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) - #cnts = cnts/2 - cnts = [(i/6).astype(int) for i in cnts] - results = map(partial(do_back_rotation_and_get_cnt_back, - img=img, - slope_first=slope_first, - confidence_matrix=confidence_matrix, - ), - cnts, range(len(cnts))) - contours, indexes, conf_contours = tuple(zip(*results)) - return [i*6 for i in contours], list(conf_contours) -def return_contours_of_interested_textline(region_pre_p, pixel): + confidence_matrix = cv2.resize(confidence_matrix, + (img.shape[1] // 6, img.shape[0] // 6), + interpolation=cv2.INTER_NEAREST) + confs = [] + for cnt in cnts: + cnt_mask = np.zeros(confidence_matrix.shape) + cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0) + confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) + return cnts, confs + +def return_contours_of_interested_textline(region_pre_p, label): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -293,12 +295,12 @@ def return_contours_of_image(image): contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy -def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): +def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -311,12 +313,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si return contours_imgs -def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): +def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -332,3 +334,97 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, return img_ret[:, :, 0] +def dilate_textline_contours(all_found_textline_polygons): + return [[polygon2contour(contour2polygon(contour, dilate=6)) + for contour in region] + for region in all_found_textline_polygons] + +def dilate_textregion_contours(all_found_textline_polygons): + return [polygon2contour(contour2polygon(contour, dilate=6)) + for contour in all_found_textline_polygons] + +def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): + polygon = Polygon([point[0] for point in contour]) + if dilate: + polygon = polygon.buffer(dilate) + if polygon.geom_type == 'GeometryCollection': + # heterogeneous result: filter zero-area shapes (LineString, Point) + polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0]) + if polygon.geom_type == 'MultiPolygon': + # homogeneous result: construct convex hull to connect + polygon = join_polygons(polygon.geoms) + return make_valid(polygon) + +def polygon2contour(polygon: Polygon) -> np.ndarray: + polygon = np.array(polygon.exterior.coords[:-1], dtype=int) + return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis] + +def make_valid(polygon: Polygon) -> Polygon: + """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" + def isint(x): + return isinstance(x, int) or int(x) == x + # make sure rounding does not invalidate + if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0: + polygon = Polygon(np.round(polygon.exterior.coords)) + points = list(polygon.exterior.coords[:-1]) + # try by re-arranging points + for split in range(1, len(points)): + if polygon.is_valid or polygon.simplify(polygon.area).is_valid: + break + # simplification may not be possible (at all) due to ordering + # in that case, try another starting point + polygon = Polygon(points[-split:]+points[:-split]) + # try by simplification + for tolerance in range(int(polygon.area + 1.5)): + if polygon.is_valid: + break + # simplification may require a larger tolerance + polygon = polygon.simplify(tolerance + 1) + # try by enlarging + for tolerance in range(1, int(polygon.area + 2.5)): + if polygon.is_valid: + break + # enlargement may require a larger tolerance + polygon = polygon.buffer(tolerance) + assert polygon.is_valid, polygon.wkt + return polygon + +def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon: + """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points""" + # ensure input polygons are simply typed and all oriented equally + polygons = [orient(poly) + for poly in itertools.chain.from_iterable( + [poly.geoms + if poly.geom_type in ['MultiPolygon', 'GeometryCollection'] + else [poly] + for poly in polygons])] + npoly = len(polygons) + if npoly == 1: + return polygons[0] + # find min-dist path through all polygons (travelling salesman) + pairs = itertools.combinations(range(npoly), 2) + dists = np.zeros((npoly, npoly), dtype=float) + for i, j in pairs: + dist = polygons[i].distance(polygons[j]) + if dist < 1e-5: + dist = 1e-5 # if pair merely touches, we still need to get an edge + dists[i, j] = dist + dists[j, i] = dist + dists = minimum_spanning_tree(dists, overwrite=True) + # add bridge polygons (where necessary) + for prevp, nextp in zip(*dists.nonzero()): + prevp = polygons[prevp] + nextp = polygons[nextp] + nearest = nearest_points(prevp, nextp) + bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1) + polygons.append(bridgep) + jointp = unary_union(polygons) + assert jointp.geom_type == 'Polygon', jointp.wkt + # follow-up calculations will necessarily be integer; + # so anticipate rounding here and then ensure validity + jointp2 = set_precision(jointp, 1.0) + if jointp2.geom_type != 'Polygon' or not jointp2.is_valid: + jointp2 = Polygon(np.round(jointp.exterior.coords)) + jointp2 = make_valid(jointp2) + assert jointp2.geom_type == 'Polygon', jointp2.wkt + return jointp2 diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index ac8dc1d..595cd14 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -99,6 +99,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve except: point_left=first_nonzero + if point_left == first_nonzero and point_right == last_nonzero: + return text_regions if point_right>=mask_marginals.shape[1]: diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index ead5cfb..d41dda1 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -17,9 +17,12 @@ from .contour import ( return_contours_of_interested_textline, find_contours_mean_y_diff, ) +from .shm import share_ndarray, wrap_ndarray_shared from . import ( find_num_col_deskew, crop_image_inside_box, + box2rect, + box2slice, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -64,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -75,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : - arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -176,7 +183,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -236,7 +244,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): try: neg_peaks_max=np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -313,23 +322,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down =y_max_cont-1 + ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down =y_max_cont-1 + ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( - 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + 1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) @@ -338,7 +360,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_narrow = img_patch.shape[0] - 2 - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -465,7 +489,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -540,7 +565,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -610,7 +636,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -686,30 +713,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: - point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = x_max_cont - 1 + ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = x_max_cont - 1 + ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) + #point_up + # np.max(y_cont) + #peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] @@ -798,7 +845,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -863,7 +911,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -947,7 +996,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -960,8 +1010,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): if len(arg_diff_cluster) > 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1011,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): try: neg_peaks_max = np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -1287,7 +1341,9 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i return None, cont_final -def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): +def textline_contours_postprocessing(textline_mask, slope, + contour_text_interest, box_ind, + add_boxes_coor_into_textlines=False): textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask.astype(np.uint8) kernel = np.ones((5, 5), np.uint8) @@ -1347,24 +1403,26 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean -def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): +def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None): if logger is None: logger = getLogger(__package__) + if not np.prod(img_crop.shape): + return img_crop if num_col == 1: - num_patches = int(img_path.shape[1] / 200.0) + num_patches = int(img_crop.shape[1] / 200.0) else: - num_patches = int(img_path.shape[1] / 140.0) - # num_patches=int(img_path.shape[1]/200.) + num_patches = int(img_crop.shape[1] / 140.0) + # num_patches=int(img_crop.shape[1]/200.) if num_patches == 0: num_patches = 1 - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] + img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] - # plt.imshow(img_patch_ineterst) + # plt.imshow(img_patch_interest) # plt.show() - length_x = int(img_path.shape[1] / float(num_patches)) + length_x = int(img_crop.shape[1] / float(num_patches)) # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 margin = int(0.04 * length_x) # if margin<=4: @@ -1372,7 +1430,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # margin=0 width_mid = length_x - 2 * margin - nxf = img_path.shape[1] / float(width_mid) + nxf = img_crop.shape[1] / float(width_mid) if nxf > int(nxf): nxf = int(nxf) + 1 @@ -1388,12 +1446,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x + if index_x_u > img_crop.shape[1]: + index_x_u = img_crop.shape[1] + index_x_d = img_crop.shape[1] - length_x # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + img_xline = img_patch_interest[:, index_x_d:index_x_u] try: assert img_xline.any() @@ -1409,9 +1467,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] + img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] - img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) + img_patch_interest_revised = np.zeros(img_patch_interest.shape) for i in range(nxf): if i == 0: @@ -1421,11 +1479,11 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x + if index_x_u > img_crop.shape[1]: + index_x_u = img_crop.shape[1] + index_x_d = img_crop.shape[1] - length_x - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + img_xline = img_patch_interest[:, index_x_d:index_x_u] img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] @@ -1448,11 +1506,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] - img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size + img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size - return img_patch_ineterst_revised + return img_patch_interest_revised -def do_image_rotation(angle, img, sigma_des, logger=None): +@wrap_ndarray_shared(kw='img') +def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None): if logger is None: logger = getLogger(__package__) img_rot = rotate_image(img, angle) @@ -1465,7 +1524,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None): return var def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, - main_page=False, logger=None, plotter=None, map=map): + main_page=False, logger=None, plotter=None, map=None): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1479,159 +1538,75 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) - #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] + #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0], + # int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: angles = np.array([-45, 0, 45, 90,]) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) elif main_page: - angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + #angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) + angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4), + np.linspace(-6, 6, n_tot_angles // 2), + np.linspace(7, 12, n_tot_angles // 4))) + angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=11 if abs(angle) > early_slope_edge: if angle < 0: - angles = np.linspace(-90, -12, n_tot_angles) + angles2 = np.linspace(-90, -12, n_tot_angles) else: - angles = np.linspace(90, 12, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angles2 = np.linspace(90, 12, n_tot_angles) + angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter) + if var2 > var: + angle = angle2 else: angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) + angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=22 if abs(angle) > early_slope_edge: if angle < 0: - angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) + angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) else: - angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - + angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) + angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter) + if var2 > var: + angle = angle2 return angle def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): if logger is None: logger = getLogger(__package__) - results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles)) + if map is None: + results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger) + for angle in angles] + else: + with share_ndarray(img) as img_shared: + results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None), + angles)) if plotter: plotter.save_plot_of_rotation_angle(angles, results) try: var_res = np.array(results) assert var_res.any() - angle = angles[np.argmax(var_res)] + idx = np.argmax(var_res) + angle = angles[idx] + var = var_res[idx] except: logger.exception("cannot determine best angle among %s", str(angles)) angle = 0 - return angle - - -def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100, - main_page=False, logger=None, plotter=None): - if main_page and plotter: - plotter.save_plot_of_textline_density(img_patch_org) - - img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) - img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] - - max_shape=np.max(img_int.shape) - img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) )) - - onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) - onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) - - img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] - - if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: - angles = np.array([-45, 0, 45, 90,]) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - elif main_page: - angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - early_slope_edge=11 - if abs(angle) > early_slope_edge: - if angle < 0: - angles = np.linspace(-90, -12, n_tot_angles) - else: - angles = np.linspace(90, 12, n_tot_angles) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - else: - angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - early_slope_edge=22 - if abs(angle) > early_slope_edge: - if angle < 0: - angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) - else: - angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter) - - return angle - -def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des): - vars_per_each_subprocess = [] - angles_per_each_subprocess = [] - for mv in range(len(angles_per_process)): - img_rot=rotate_image(img_resized,angles_per_process[mv]) - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - vars_per_each_subprocess.append(var_spectrum) - angles_per_each_subprocess.append(angles_per_process[mv]) - - queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess]) - -def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None): - num_cores = cpu_count() - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angles), num_cores + 1) - - for i in range(num_cores): - angles_per_process = angles[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - all_angles = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - vars_for_subprocess = list_all_par[0] - angles_sub_process = list_all_par[1] - for j in range(len(vars_for_subprocess)): - var_res.append(vars_for_subprocess[j]) - all_angles.append(angles_sub_process[j]) - - for i in range(num_cores): - processes[i].join() - - if plotter: - plotter.save_plot_of_rotation_angle(all_angles, var_res) - - - try: - var_res=np.array(var_res) - ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - return ang_int + var = 0 + return angle, var +@wrap_ndarray_shared(kw='textline_mask_tot_ea') def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, + textline_mask_tot_ea=None, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1641,7 +1616,7 @@ def do_work_of_slopes_new( logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1649,7 +1624,7 @@ def do_work_of_slopes_new( img_int_p = all_text_region_raw[:,:] img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) - if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] @@ -1687,9 +1662,12 @@ def do_work_of_slopes_new( return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope +@wrap_ndarray_shared(kw='textline_mask_tot_ea') +@wrap_ndarray_shared(kw='mask_texts_only') def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, + textline_mask_tot_ea=None, mask_texts_only=None, + num_col=1, scale_par=1.0, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1706,7 +1684,7 @@ def do_work_of_slopes_new_curved( # plt.imshow(img_int_p) # plt.show() - if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: + if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew else: @@ -1732,7 +1710,7 @@ def do_work_of_slopes_new_curved( slope_for_all = slope_deskew slope = slope_for_all - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) if abs(slope_for_all) < 45: textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) @@ -1765,20 +1743,25 @@ def do_work_of_slopes_new_curved( mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + mask_biggest2 = resize_image(mask_biggest2, + int(mask_biggest2.shape[0] * scale_par), + int(mask_biggest2.shape[1] * scale_par)) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) try: textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) except Exception as why: logger.error(why) else: - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, + slope_for_all, contour_par, + box_text, True) return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope +@wrap_ndarray_shared(kw='textline_mask_tot_ea') def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, + textline_mask_tot_ea=None, slope_deskew=0, textline_light=True, logger=None ): if logger is None: @@ -1786,7 +1769,7 @@ def do_work_of_slopes_new_light( logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline diff --git a/src/eynollah/utils/shm.py b/src/eynollah/utils/shm.py new file mode 100644 index 0000000..4b51053 --- /dev/null +++ b/src/eynollah/utils/shm.py @@ -0,0 +1,45 @@ +from multiprocessing import shared_memory +from contextlib import contextmanager +from functools import wraps +import numpy as np + +@contextmanager +def share_ndarray(array: np.ndarray): + size = np.dtype(array.dtype).itemsize * np.prod(array.shape) + shm = shared_memory.SharedMemory(create=True, size=size) + try: + shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf) + shared_array[:] = array[:] + shared_array.flags["WRITEABLE"] = False + yield dict(shape=array.shape, dtype=array.dtype, name=shm.name) + finally: + shm.close() + shm.unlink() + +@contextmanager +def ndarray_shared(array: dict): + shm = shared_memory.SharedMemory(name=array['name']) + try: + array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf) + yield array + finally: + shm.close() + +def wrap_ndarray_shared(kw=None): + def wrapper(f): + if kw is None: + @wraps(f) + def shared_func(array, *args, **kwargs): + with ndarray_shared(array) as ndarray: + return f(ndarray, *args, **kwargs) + return shared_func + else: + @wraps(f) + def shared_func(*args, **kwargs): + array = kwargs.pop(kw) + with ndarray_shared(array) as ndarray: + kwargs[kw] = ndarray + return f(*args, **kwargs) + return shared_func + return wrapper + diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 4fa99f7..602ad6e 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -92,6 +92,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t return peaks_final else: return None + # Function to fit text inside the given area def fit_text_single_line(draw, text, font_path, max_width, max_height): initial_font_size = 50 @@ -369,7 +370,11 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind return textline_contour -def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False): +def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, + prediction_model, + b_s_ocr, num_to_char, + textline_light=False, + curved_line=False): max_len = 512 padding_token = 299 image_width = 512#max_len * 4 @@ -425,17 +430,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) else: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -468,7 +479,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr pred_texts_ib = pred_texts[ib].replace("[UNK]", "") extracted_texts.append(pred_texts_ib) - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 2f9caf3..936c95f 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -289,7 +289,7 @@ class EynollahXmlWriter(): self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): - textregion = TextRegionType(id=counter.next_region_id, type_='header', + textregion = TextRegionType(id=counter.next_region_id, type_='heading', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) @@ -335,7 +335,7 @@ class EynollahXmlWriter(): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) + page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) for mm in range(len(found_polygons_tables)): page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) diff --git a/tests/test_run.py b/tests/test_run.py index be928a0..59e5099 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -20,23 +20,9 @@ MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_ MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) -def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): - infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') - outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' - args = [ - '-m', MODELS_LAYOUT, - '-i', str(infile), - '-o', str(outfile.parent), - # subtests write to same location - '--overwrite', - ] - if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) - caplog.set_level(logging.INFO) - def only_eynollah(logrec): - return logrec.name == 'eynollah' - runner = CliRunner() - for options in [ +@pytest.mark.parametrize( + "options", + [ [], # defaults ["--allow_scaling", "--curved-line"], ["--allow_scaling", "--curved-line", "--full-layout"], @@ -47,22 +33,34 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): # -eoi ... # --do_ocr # --skip_layout_and_reading_order - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(layout_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - assert str(infile) in logmsgs - assert outfile.exists() - tree = page_from_file(str(outfile)).etree - regions = tree.xpath("//page:TextRegion", namespaces=NS) - assert len(regions) >= 2, "result is inaccurate" - regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) - assert len(regions) >= 2, "result is inaccurate" - lines = tree.xpath("//page:TextLine", namespaces=NS) - assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line + ], ids=str) +def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options): + infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') + outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' + args = [ + '-m', MODELS_LAYOUT, + '-i', str(infile), + '-o', str(outfile.parent), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'eynollah' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(layout_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert str(infile) in logmsgs + assert outfile.exists() + tree = page_from_file(str(outfile)).etree + regions = tree.xpath("//page:TextRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + lines = tree.xpath("//page:TextLine", namespaces=NS) + assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') @@ -86,7 +84,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in')) assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + ["--no-patches"], + ], ids=str) +def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ @@ -100,25 +104,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca def only_eynollah(logrec): return logrec.name == 'SbbBinarizer' runner = CliRunner() - for options in [ - [], # defaults - ["--no-patches"], - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) - assert outfile.exists() - with Image.open(infile) as original_img: - original_size = original_img.size - with Image.open(outfile) as binarized_img: - binarized_size = binarized_img.size - assert original_size == binarized_size + with caplog.filtering(only_eynollah): + result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) + assert outfile.exists() + with Image.open(infile) as original_img: + original_size = original_img.size + with Image.open(outfile) as binarized_img: + binarized_size = binarized_img.size + assert original_size == binarized_size -def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ @@ -139,15 +137,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + ["-sos"], + ], ids=str) +def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), - # subtests write to same location - '--overwrite', ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) @@ -155,25 +157,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap def only_eynollah(logrec): return logrec.name == 'enhancement' runner = CliRunner() - for options in [ - [], # defaults - ["-sos"], - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs - assert outfile.exists() - with Image.open(infile) as original_img: - original_size = original_img.size - with Image.open(outfile) as enhanced_img: - enhanced_size = enhanced_img.size - assert (original_size == enhanced_size) == ("-sos" in options) + with caplog.filtering(only_eynollah): + result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs + assert outfile.exists() + with Image.open(infile) as original_img: + original_size = original_img.size + with Image.open(outfile) as enhanced_img: + enhanced_size = enhanced_img.size + assert (original_size == enhanced_size) == ("-sos" in options) -def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ @@ -194,7 +190,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') args = [ @@ -223,7 +219,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo #assert in_order != out_order assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3'] -def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ @@ -245,7 +241,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2 assert len(list(outdir.iterdir())) == 2 -def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): +@pytest.mark.parametrize( + "options", + [ + [], # defaults + ["-doit", #str(outrenderfile.parent)], + ], + ["-trocr"], + ], ids=str) +def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png') @@ -255,8 +259,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): '-i', str(infile), '-dx', str(infile.parent), '-o', str(outfile.parent), - # subtests write to same location - '--overwrite', ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) @@ -264,33 +266,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() - for options in [ - # kba Fri Sep 26 12:53:49 CEST 2025 - # Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged - # [], # defaults - # ["-doit", str(outrenderfile.parent)], - ["-trocr"], - ]: - with subtests.test(#msg="test CLI", - options=options): - with caplog.filtering(only_eynollah): - result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) - assert result.exit_code == 0, result.stdout - logmsgs = [logrec.message for logrec in caplog.records] - # FIXME: ocr has no logging! - #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs - assert outfile.exists() - if "-doit" in options: - assert outrenderfile.exists() - #in_tree = page_from_file(str(infile)).etree - #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) - out_tree = page_from_file(str(outfile)).etree - out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS) - assert len(out_texts) >= 2, ("result is inaccurate", out_texts) - assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts) + if "-doit" in options: + options.insert(options.index("-doit") + 1, str(outrenderfile.parent)) + with caplog.filtering(only_eynollah): + result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + # FIXME: ocr has no logging! + #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs + assert outfile.exists() + if "-doit" in options: + assert outrenderfile.exists() + #in_tree = page_from_file(str(infile)).etree + #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) + out_tree = page_from_file(str(outfile)).etree + out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS) + assert len(out_texts) >= 2, ("result is inaccurate", out_texts) + assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts) -@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged") -def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog): +def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [