diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index d2b7057..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,51 +0,0 @@ -version: 2 - -jobs: - - build-python37: - machine: - - image: ubuntu-2004:2023.02.1 - - steps: - - checkout - - restore_cache: - keys: - - model-cache - - run: make models - - save_cache: - key: model-cache - paths: - models_eynollah.tar.gz - models_eynollah - - run: - name: "Set Python Version" - command: pyenv install -s 3.7.16 && pyenv global 3.7.16 - - run: make install - - run: make smoke-test - - build-python38: - machine: - - image: ubuntu-2004:2023.02.1 - steps: - - checkout - - restore_cache: - keys: - - model-cache - - run: make models - - save_cache: - key: model-cache - paths: - models_eynollah.tar.gz - models_eynollah - - run: - name: "Set Python Version" - command: pyenv install -s 3.8.16 && pyenv global 3.8.16 - - run: make install - - run: make smoke-test - -workflows: - version: 2 - build: - jobs: - # - build-python37 - - build-python38 diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 30c9729..3a33dcf 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Python package +name: Test on: [push] @@ -14,8 +14,8 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v2 - - uses: actions/cache@v2 + - uses: actions/checkout@v4 + - uses: actions/cache@v4 id: model_cache with: path: models_eynollah @@ -24,7 +24,7 @@ jobs: if: steps.model_cache.outputs.cache-hit != 'true' run: make models - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -34,3 +34,5 @@ jobs: pip install -r requirements-test.txt - name: Test with pytest run: make test + - name: Test docker build + run: make docker diff --git a/CHANGELOG.md b/CHANGELOG.md index da2e1c0..cf6263d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +## [0.3.1] - 2024-08-27 + +Fixed: + + * regression in OCR-D processor, #106 + * Expected Ptrcv::UMat for argument 'contour', #110 + * Memory usage explosion with very narrow images (e.g. book spine), #67 + ## [0.3.0] - 2023-05-13 Changed: @@ -117,6 +125,8 @@ Fixed: Initial release +[0.3.1]: ../../compare/v0.3.1...v0.3.0 +[0.3.0]: ../../compare/v0.3.0...v0.2.0 [0.2.0]: ../../compare/v0.2.0...v0.1.0 [0.1.0]: ../../compare/v0.1.0...v0.0.11 [0.0.11]: ../../compare/v0.0.11...v0.0.10 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6780bc2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +ARG DOCKER_BASE_IMAGE +FROM $DOCKER_BASE_IMAGE + +ARG VCS_REF +ARG BUILD_DATE +LABEL \ + maintainer="https://ocr-d.de/kontakt" \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url="https://github.com/qurator-spk/eynollah" \ + org.label-schema.build-date=$BUILD_DATE + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONIOENCODING=utf8 +ENV XDG_DATA_HOME=/usr/local/share + +WORKDIR /build-eynollah +COPY src/ ./src +COPY pyproject.toml . +COPY requirements.txt . +COPY README.md . +COPY Makefile . +RUN apt-get install -y --no-install-recommends g++ +RUN make install + +WORKDIR /data +VOLUME /data diff --git a/Makefile b/Makefile index 440b0bd..a3b7b95 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,11 @@ EYNOLLAH_MODELS ?= $(PWD)/models_eynollah export EYNOLLAH_MODELS +# DOCKER_BASE_IMAGE = artefakt.dev.sbb.berlin:5000/sbb/ocrd_core:v2.68.0 +DOCKER_BASE_IMAGE = docker.io/ocrd/core:v2.68.0 +DOCKER_TAG = ocrd/eynollah + + # BEGIN-EVAL makefile-parser --make-help Makefile help: @@ -22,16 +27,14 @@ help: models: models_eynollah models_eynollah: models_eynollah.tar.gz - # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' - # tar xf models_eynollah_renamed.tar.gz - # tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' tar xf models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' - wget https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz + # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz' + wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' # Install with pip install: @@ -47,3 +50,12 @@ smoke-test: # Run unit tests test: pytest tests + +# Build docker image +docker: + docker build \ + --build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \ + --build-arg VCS_REF=$$(git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \ + -t $(DOCKER_TAG) . + diff --git a/README.md b/README.md index 47d81bc..292cfbc 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Eynollah -> Document Layout Analysis (segmentation) using pre-trained models and heuristics +> Document Layout Analysis with Deep Learning and Heuristics [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) -[![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) [![GH Actions Test](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml/badge.svg)](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml) [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) +[![DOI](https://img.shields.io/badge/DOI-10.1145%2F3604951.3605513-red)](https://doi.org/10.1145/3604951.3605513) ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) @@ -14,16 +14,18 @@ * Support for various image optimization operations: * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing * Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text -* Detection of reading order +* Detection of reading order (left-to-right or right-to-left) * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface +:warning: Development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are welcome. + ## Installation -Python versions `3.8-3.11` with Tensorflow versions >=`2.12` are currently supported. +Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported. For (limited) GPU support the CUDA toolkit needs to be installed. -You can either install via +You can either install from PyPI ``` pip install eynollah @@ -39,45 +41,49 @@ cd eynollah; pip install -e . Alternatively, you can run `make install` or `make install-dev` for editable installation. ## Models -Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). +Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). -In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). +## Train +🚧 **Work in progress** + +In case you want to train your own model, have a look at [`sbb_pixelwise_segmentation`](https://github.com/qurator-spk/sbb_pixelwise_segmentation). ## Usage The command-line interface can be called like this: ```sh eynollah \ - -i \ + -i | -di \ -o \ - -m \ + -m \ [OPTIONS] ``` The following options can be used to further configure the processing: -| option | description | -|----------|:-------------| -| `-fl` | full layout analysis including all steps and segmentation classes | -| `-light` | lighter and faster but simpler method for main region detection and deskewing | -| `-tab` | apply table detection | -| `-ae` | apply enhancement (the resulting image is saved to the output directory) | -| `-as` | apply scaling | -| `-cl` | apply contour detection for curved text lines instead of bounding boxes | -| `-ib` | apply binarization (the resulting image is saved to the output directory) | -| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | -| `-ho` | ignore headers for reading order dectection | -| `-di ` | process all images in a directory in batch mode | -| `-si ` | save image regions detected to this directory | -| `-sd ` | save deskewed image to this directory | -| `-sl ` | save layout prediction as plot to this directory | -| `-sp ` | save cropped page image to this directory | -| `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | - -If no option is set, the tool will perform layout detection of main regions (background, text, images, separators and marginals). -The tool produces better quality output when RGB images are used as input than greyscale or binarized images. +| option | description | +|-------------------|:-------------------------------------------------------------------------------| +| `-fl` | full layout analysis including all steps and segmentation classes | +| `-light` | lighter and faster but simpler method for main region detection and deskewing | +| `-tab` | apply table detection | +| `-ae` | apply enhancement (the resulting image is saved to the output directory) | +| `-as` | apply scaling | +| `-cl` | apply contour detection for curved text lines instead of bounding boxes | +| `-ib` | apply binarization (the resulting image is saved to the output directory) | +| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | +| `-eoi` | extract only images to output directory (other processing will not be done) | +| `-ho` | ignore headers for reading order dectection | +| `-si ` | save image regions detected to this directory | +| `-sd ` | save deskewed image to this directory | +| `-sl ` | save layout prediction as plot to this directory | +| `-sp ` | save cropped page image to this directory | +| `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | + +If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals). +The best output quality is produced when RGB images are used as input rather than greyscale or binarized images. #### Use as OCR-D processor +🚧 **Work in progress** Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. @@ -95,11 +101,14 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps +#### Additional documentation +Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki). + ## How to cite If you find this tool useful in your work, please consider citing our paper: ```bibtex -@inproceedings{rezanezhad2023eynollah, +@inproceedings{hip23rezanezhad, title = {Document Layout Analysis with Deep Learning and Heuristics}, author = {Rezanezhad, Vahid and Baierer, Konstantin and Gerber, Mike and Labusch, Kai and Neudecker, Clemens}, booktitle = {Proceedings of the 7th International Workshop on Historical Document Imaging and Processing {HIP} 2023, diff --git a/ocrd-tool.json b/ocrd-tool.json index 5c48493..711a192 120000 --- a/ocrd-tool.json +++ b/ocrd-tool.json @@ -1 +1 @@ -qurator/eynollah/ocrd-tool.json \ No newline at end of file +src/eynollah/ocrd-tool.json \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 67544bb..67a420d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,34 +1,43 @@ [build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" +requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] [project] name = "eynollah" -version = "0.1.0" - - - - -dependencies = [ - "ocrd >= 2.23.3", - "tensorflow == 2.12.1", - "scikit-learn >= 0.23.2", - "imutils >= 0.5.3", - "numpy < 1.24.0", - "matplotlib", - "torch == 2.0.1", - "transformers == 4.30.2", - "numba == 0.58.1", +authors = [ + {name = "Vahid Rezanezhad"}, + {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, +] +description = "Document Layout Analysis" +readme = "README.md" +license.file = "LICENSE" +requires-python = ">=3.8" +keywords = ["document layout analysis", "image segmentation"] + +dynamic = ["dependencies", "version"] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Image Processing", ] [project.scripts] -eynollah = "qurator.eynollah.cli:main" -ocrd-eynollah-segment="qurator.eynollah.ocrd_cli:main" +eynollah = "eynollah.cli:main" +ocrd-eynollah-segment = "eynollah.ocrd_cli:main" + +[project.urls] +Homepage = "https://github.com/qurator-spk/eynollah" +Repository = "https://github.com/qurator-spk/eynollah.git" +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} [tool.setuptools.packages.find] -where = ["."] -include = ["qurator"] +where = ["src"] [tool.setuptools.package-data] "*" = ["*.json", '*.yml', '*.xml', '*.xsd'] diff --git a/qurator/__init__.py b/qurator/__init__.py deleted file mode 100644 index 5284146..0000000 --- a/qurator/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__import__("pkg_resources").declare_namespace(__name__) diff --git a/qurator/eynollah/__init__.py b/qurator/eynollah/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/qurator/eynollah/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f01d319 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +# ocrd includes opencv, numpy, shapely, click +ocrd >= 2.23.3 +numpy <1.24.0 +scikit-learn >= 0.23.2 +tensorflow == 2.12.1 +imutils >= 0.5.3 +matplotlib +setuptools >= 50 diff --git a/qurator/.gitkeep b/src/eynollah/__init__.py similarity index 100% rename from qurator/.gitkeep rename to src/eynollah/__init__.py diff --git a/qurator/eynollah/cli.py b/src/eynollah/cli.py similarity index 92% rename from qurator/eynollah/cli.py rename to src/eynollah/cli.py index 0daf0c9..9d77229 100644 --- a/qurator/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -1,8 +1,8 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel -from qurator.eynollah.eynollah import Eynollah -from qurator.eynollah.sbb_binarize import SbbBinarizer +from eynollah.eynollah import Eynollah +from eynollah.sbb_binarize import SbbBinarizer @click.group() def main(): @@ -146,6 +146,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) is_flag=True, help="If set, will plot intermediary files and images", ) +@click.option( + "--extract_only_images/--disable-extracting_only_images", + "-eoi/-noeoi", + is_flag=True, + help="If a directory is given, only images in documents will be cropped and saved there and the other processing will not be done", +) @click.option( "--allow-enhancement/--no-allow-enhancement", "-ae/-noae", @@ -262,6 +268,8 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s sys.exit(1) if light_version and not textline_light: print('Error: You used -light without -tll. Light version need light textline to be enabled.') + if extract_only_images and (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) : + print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho') sys.exit(1) eynollah = Eynollah( image_filename=image, @@ -269,6 +277,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s dir_in=dir_in, dir_models=model, dir_of_cropped_images=save_images, + extract_only_images=extract_only_images, dir_of_layout=save_layout, dir_of_deskewed=save_deskewed, dir_of_all=save_all, diff --git a/qurator/eynollah/eynollah.py b/src/eynollah/eynollah.py similarity index 97% rename from qurator/eynollah/eynollah.py rename to src/eynollah/eynollah.py index 9095c15..70d8012 100644 --- a/qurator/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -158,6 +158,7 @@ class Eynollah: dir_out=None, dir_in=None, dir_of_cropped_images=None, + extract_only_images=False, dir_of_layout=None, dir_of_deskewed=None, dir_of_all=None, @@ -211,6 +212,8 @@ class Eynollah: self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off + self.light_version = light_version + self.extract_only_images = extract_only_images self.ignore_page_extraction = ignore_page_extraction self.skip_layout_and_reading_order = skip_layout_and_reading_order self.ocr = do_ocr @@ -254,6 +257,7 @@ class Eynollah: self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: @@ -288,7 +292,23 @@ class Eynollah: self.ls_imgs = os.listdir(self.dir_in) - if dir_in and not light_version: + if dir_in and self.extract_only_images: + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + session = tf.compat.v1.Session(config=config) + set_session(session) + + self.model_page = self.our_load_model(self.model_page_dir) + self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) + self.model_bin = self.our_load_model(self.model_dir_of_binarization) + #self.model_textline = self.our_load_model(self.model_textline_dir) + self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) + #self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) + #self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + + self.ls_imgs = os.listdir(self.dir_in) + + if dir_in and not (light_version or self.extract_only_images): config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) @@ -534,6 +554,27 @@ class Eynollah: return img_new, num_column_is_classified + def calculate_width_height_by_columns_extract_only_images(self, img, num_col, width_early, label_p_pred): + self.logger.debug("enter calculate_width_height_by_columns") + if num_col == 1: + img_w_new = 700 + elif num_col == 2: + img_w_new = 900 + elif num_col == 3: + img_w_new = 1500 + elif num_col == 4: + img_w_new = 1800 + elif num_col == 5: + img_w_new = 2200 + elif num_col == 6: + img_w_new = 2500 + img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) + + img_new = resize_image(img, img_h_new, img_w_new) + num_column_is_classified = True + + return img_new, num_column_is_classified + def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") if self.input_binary: @@ -690,25 +731,30 @@ class Eynollah: self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) - if dpi < DPI_THRESHOLD: - if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) - else: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) - if light_version: - image_res = np.copy(img_new) - else: - image_res = self.predict_enhancement(img_new) - is_image_enhanced = True - else: - if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) - image_res = np.copy(img_new) + if not self.extract_only_images: + if dpi < DPI_THRESHOLD: + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + else: + img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + if light_version: + image_res = np.copy(img_new) + else: + image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: - num_column_is_classified = True - image_res = np.copy(img) - is_image_enhanced = False + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + image_res = np.copy(img_new) + is_image_enhanced = True + else: + num_column_is_classified = True + image_res = np.copy(img) + is_image_enhanced = False + else: + num_column_is_classified = True + image_res = np.copy(img) + is_image_enhanced = False self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin @@ -1190,106 +1236,6 @@ class Eynollah: img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] batch_indexer = batch_indexer + 1 - - #img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - #verbose=0) - #seg = np.argmax(label_p_pred, axis=3)[0] - - - ######seg_not_base = label_p_pred[0,:,:,4] - ########seg2 = -label_p_pred[0,:,:,2] - - - ######seg_not_base[seg_not_base>0.03] =1 - ######seg_not_base[seg_not_base<1] =0 - - - - ######seg_test = label_p_pred[0,:,:,1] - ########seg2 = -label_p_pred[0,:,:,2] - - - ######seg_test[seg_test>0.75] =1 - ######seg_test[seg_test<1] =0 - - - ######seg_line = label_p_pred[0,:,:,3] - ########seg2 = -label_p_pred[0,:,:,2] - - - ######seg_line[seg_line>0.1] =1 - ######seg_line[seg_line<1] =0 - - - ######seg_background = label_p_pred[0,:,:,0] - ########seg2 = -label_p_pred[0,:,:,2] - - - ######seg_background[seg_background>0.25] =1 - ######seg_background[seg_background<1] =0 - ##seg = seg+seg2 - #seg = label_p_pred[0,:,:,2] - #seg[seg>0.4] =1 - #seg[seg<1] =0 - - ##plt.imshow(seg_test) - ##plt.show() - - ##plt.imshow(seg_background) - ##plt.show() - #seg[seg==1]=0 - #seg[seg_test==1]=1 - ######seg[seg_not_base==1]=4 - ######seg[seg_background==1]=0 - ######seg[(seg_line==1) & (seg==0)]=3 - #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - #if i == 0 and j == 0: - #seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - #elif i == nxf - 1 and j == nyf - 1: - #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - #elif i == 0 and j == nyf - 1: - #seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - #elif i == nxf - 1 and j == 0: - #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - #elif i == 0 and j != 0 and j != nyf - 1: - #seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - #elif i == nxf - 1 and j != 0 and j != nyf - 1: - #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - #elif i != 0 and i != nxf - 1 and j == 0: - #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - #elif i != 0 and i != nxf - 1 and j == nyf - 1: - #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - #else: - #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color if batch_indexer == n_batch_inference: @@ -1302,18 +1248,11 @@ class Eynollah: seg_art = label_p_pred[:,:,:,4] seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 - ###seg[seg_art==1]=4 - ##seg_not_base = label_p_pred[:,:,:,4] - ##seg_not_base[seg_not_base>0.03] =1 - ##seg_not_base[seg_not_base<1] =0 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 - ##seg_background = label_p_pred[:,:,:,0] - ##seg_background[seg_background>0.25] =1 - ##seg_background[seg_background<1] =0 seg[seg_art==1]=4 ##seg[seg_background==1]=0 @@ -1384,20 +1323,15 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 + seg[seg_art==1]=4 seg[(seg_line==1) & (seg==0)]=3 if thresholding_for_artificial_class_in_light_version: @@ -2224,6 +2158,119 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) + + def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): + self.logger.debug("enter get_regions_extract_images_only") + erosion_hurts = False + img_org = np.copy(img) + img_height_h = img_org.shape[0] + img_width_h = img_org.shape[1] + + if num_col_classifier == 1: + img_w_new = 700 + elif num_col_classifier == 2: + img_w_new = 900 + elif num_col_classifier == 3: + img_w_new = 1500 + elif num_col_classifier == 4: + img_w_new = 1800 + elif num_col_classifier == 5: + img_w_new = 2200 + elif num_col_classifier == 6: + img_w_new = 2500 + img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) + + img_resized = resize_image(img,img_h_new, img_w_new ) + + + + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) + + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) + + image_page, page_coord, cont_page = self.extract_page() + + + prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + + mask_images_only=(prediction_regions_org[:,:] ==2)*1 + + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + + text_regions_p_true = np.zeros(prediction_regions_org.shape) + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + + text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) + + + + text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0 + text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0 + + ##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001) + polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001) + + image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) + + ###image_boundary_of_doc[:6, :] = 1 + ###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1 + + ###image_boundary_of_doc[:, :6] = 1 + ###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1 + + + polygons_of_images_fin = [] + for ploy_img_ind in polygons_of_images: + """ + test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) + test_poly_image = cv2.fillPoly(test_poly_image, pts = [ploy_img_ind], color=(1,1,1)) + + test_poly_image = test_poly_image[:,:] + image_boundary_of_doc[:,:] + test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1 + + test_poly_image_intersected_area = test_poly_image_intersected_area.sum() + + if test_poly_image_intersected_area==0: + ##polygons_of_images_fin.append(ploy_img_ind) + + x, y, w, h = cv2.boundingRect(ploy_img_ind) + box = [x, y, w, h] + _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) + + polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + """ + x, y, w, h = cv2.boundingRect(ploy_img_ind) + if h < 150 or w < 150: + pass + else: + box = [x, y, w, h] + _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) + + polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + + return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() @@ -3179,11 +3226,13 @@ class Eynollah: prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) + def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() + img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -3667,12 +3716,12 @@ class Eynollah: img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - - + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 width1 = 448#224 + t0_tot = time.time() height2 =672#448 width2= 448#224 @@ -3684,7 +3733,6 @@ class Eynollah: if contours_only_text_parent_h: _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - for j in range(len(cy_main)): img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 diff --git a/qurator/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json similarity index 93% rename from qurator/eynollah/ocrd-tool.json rename to src/eynollah/ocrd-tool.json index 8a2cb95..b840005 100644 --- a/qurator/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "0.3.0", + "version": "0.3.1", "git_url": "https://github.com/qurator-spk/eynollah", "tools": { "ocrd-eynollah-segment": { @@ -52,10 +52,10 @@ }, "resources": [ { - "description": "models for eynollah (TensorFlow format)", - "url": "https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz", + "description": "models for eynollah (TensorFlow SavedModel format)", + "url": "https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz", "name": "default", - "size": 1761991295, + "size": 1894627041, "type": "archive", "path_in_archive": "models_eynollah" } diff --git a/qurator/eynollah/ocrd_cli.py b/src/eynollah/ocrd_cli.py similarity index 100% rename from qurator/eynollah/ocrd_cli.py rename to src/eynollah/ocrd_cli.py diff --git a/qurator/eynollah/plot.py b/src/eynollah/plot.py similarity index 100% rename from qurator/eynollah/plot.py rename to src/eynollah/plot.py diff --git a/qurator/eynollah/processor.py b/src/eynollah/processor.py similarity index 97% rename from qurator/eynollah/processor.py rename to src/eynollah/processor.py index ccec456..1bd190e 100644 --- a/qurator/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -42,7 +42,7 @@ class EynollahProcessor(Processor): page = pcgts.get_Page() # XXX loses DPI information # page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename + image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename eynollah_kwargs = { 'dir_models': self.resolve_resource(self.parameter['models']), 'allow_enhancement': False, diff --git a/qurator/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py similarity index 100% rename from qurator/eynollah/sbb_binarize.py rename to src/eynollah/sbb_binarize.py diff --git a/qurator/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py similarity index 100% rename from qurator/eynollah/utils/__init__.py rename to src/eynollah/utils/__init__.py diff --git a/qurator/eynollah/utils/contour.py b/src/eynollah/utils/contour.py similarity index 100% rename from qurator/eynollah/utils/contour.py rename to src/eynollah/utils/contour.py diff --git a/qurator/eynollah/utils/counter.py b/src/eynollah/utils/counter.py similarity index 100% rename from qurator/eynollah/utils/counter.py rename to src/eynollah/utils/counter.py diff --git a/qurator/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py similarity index 100% rename from qurator/eynollah/utils/drop_capitals.py rename to src/eynollah/utils/drop_capitals.py diff --git a/qurator/eynollah/utils/is_nan.py b/src/eynollah/utils/is_nan.py similarity index 100% rename from qurator/eynollah/utils/is_nan.py rename to src/eynollah/utils/is_nan.py diff --git a/qurator/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py similarity index 100% rename from qurator/eynollah/utils/marginals.py rename to src/eynollah/utils/marginals.py diff --git a/qurator/eynollah/utils/pil_cv2.py b/src/eynollah/utils/pil_cv2.py similarity index 100% rename from qurator/eynollah/utils/pil_cv2.py rename to src/eynollah/utils/pil_cv2.py diff --git a/qurator/eynollah/utils/resize.py b/src/eynollah/utils/resize.py similarity index 100% rename from qurator/eynollah/utils/resize.py rename to src/eynollah/utils/resize.py diff --git a/qurator/eynollah/utils/rotate.py b/src/eynollah/utils/rotate.py similarity index 100% rename from qurator/eynollah/utils/rotate.py rename to src/eynollah/utils/rotate.py diff --git a/qurator/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py similarity index 100% rename from qurator/eynollah/utils/separate_lines.py rename to src/eynollah/utils/separate_lines.py diff --git a/qurator/eynollah/utils/xml.py b/src/eynollah/utils/xml.py similarity index 100% rename from qurator/eynollah/utils/xml.py rename to src/eynollah/utils/xml.py diff --git a/qurator/eynollah/writer.py b/src/eynollah/writer.py similarity index 96% rename from qurator/eynollah/writer.py rename to src/eynollah/writer.py index 8eb1027..96441c6 100644 --- a/qurator/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -202,10 +202,18 @@ class EynollahXmlWriter(): page.add_ImageRegion(img_region) points_co = '' for lmm in range(len(found_polygons_text_region_img[mm])): - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) - points_co += ' ' + try: + points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) + points_co += ' ' + except: + + points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x )) + points_co += ',' + points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y )) + points_co += ' ' + img_region.get_Coords().set_points(points_co[:-1]) for mm in range(len(polygons_lines_to_be_written_in_xml)): diff --git a/tests/test_counter.py b/tests/test_counter.py index 8ef0756..42bf074 100644 --- a/tests/test_counter.py +++ b/tests/test_counter.py @@ -1,5 +1,5 @@ from tests.base import main -from qurator.eynollah.utils.counter import EynollahIdCounter +from eynollah.utils.counter import EynollahIdCounter def test_counter_string(): c = EynollahIdCounter() diff --git a/tests/test_dpi.py b/tests/test_dpi.py index 510ffc5..3376bf4 100644 --- a/tests/test_dpi.py +++ b/tests/test_dpi.py @@ -1,6 +1,6 @@ import cv2 from pathlib import Path -from qurator.eynollah.utils.pil_cv2 import check_dpi +from eynollah.utils.pil_cv2 import check_dpi from tests.base import main def test_dpi(): diff --git a/tests/test_run.py b/tests/test_run.py index b1137e7..2596dad 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,7 @@ from os import environ from pathlib import Path from ocrd_utils import pushd_popd from tests.base import CapturingTestCase as TestCase, main -from qurator.eynollah.cli import main as eynollah_cli +from eynollah.cli import main as eynollah_cli testdir = Path(__file__).parent.resolve() diff --git a/tests/test_smoke.py b/tests/test_smoke.py index d069479..252213f 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1,7 +1,7 @@ def test_utils_import(): - import qurator.eynollah.utils - import qurator.eynollah.utils.contour - import qurator.eynollah.utils.drop_capitals - import qurator.eynollah.utils.drop_capitals - import qurator.eynollah.utils.is_nan - import qurator.eynollah.utils.rotate + import eynollah.utils + import eynollah.utils.contour + import eynollah.utils.drop_capitals + import eynollah.utils.drop_capitals + import eynollah.utils.is_nan + import eynollah.utils.rotate diff --git a/tests/test_xml.py b/tests/test_xml.py index 8422fd1..09a6ddf 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -1,5 +1,5 @@ from pytest import main -from qurator.eynollah.utils.xml import create_page_xml +from eynollah.utils.xml import create_page_xml from ocrd_models.ocrd_page import to_xml PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'