From f09b7c1bef9e91f244232eb88fab48f59624f822 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:29:10 +0100 Subject: [PATCH 01/26] use tf1 compatibility for keras backend --- qurator/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 49422fa..c162af7 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -29,7 +29,8 @@ warnings.filterwarnings("ignore") from scipy.signal import find_peaks import matplotlib.pyplot as plt from scipy.ndimage import gaussian_filter1d -from tensorflow.python.keras.backend import set_session +# use tf1 compatibility for keras backend +from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras import layers from .utils.contour import ( From b3fa68439559479f2786c12482fd9270af9b4075 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:30:40 +0100 Subject: [PATCH 02/26] pin tf2 version to 2.12.1 until we fix keras compatibility --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 530dac2..f01d319 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ ocrd >= 2.23.3 numpy <1.24.0 scikit-learn >= 0.23.2 -tensorflow >=2.12.0 +tensorflow == 2.12.1 imutils >= 0.5.3 matplotlib setuptools >= 50 From 533736a3e355c37fbe7bea8c993c502992390f85 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Wed, 20 Mar 2024 00:28:22 +0100 Subject: [PATCH 03/26] update supported Python+Tensorflow version combinations --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index b095edb..2dc90ec 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,7 @@ * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface ## Installation -Python versions `3.8-3.11` with Tensorflow versions >=`2.12` on Linux are currently supported. Unfortunately we can not currently support Windows or MacOS. -Windows users may be able to successfully run the tool through [WSL](https://learn.microsoft.com/en-us/windows/wsl/). +Python versions `3.8-3.11` with Tensorflow versions `2.12-2.15` on Linux are currently supported. For (limited) GPU support the CUDA toolkit needs to be installed. From ba64282118cd4891067a69825d7e03614c4eada7 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Wed, 3 Apr 2024 19:58:24 +0200 Subject: [PATCH 04/26] Update README.md --- README.md | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2dc90ec..302880a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # Eynollah -> Document Layout Analysis (segmentation) using pre-trained models and heuristics +> Document Layout Analysis with Deep Learning and Heuristics [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) [![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) [![GH Actions Test](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml/badge.svg)](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml) [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) +[![DOI](https://img.shields.io/badge/DOI-10.1145%2F3604951.3605513-red)](https://doi.org/10.1145/3604951.3605513) ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) @@ -14,16 +15,19 @@ * Support for various image optimization operations: * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing * Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text -* Detection of reading order +* Detection of reading order (left-to-right or right-to-left) * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface +:warning: Eynollah development is currently focused on achieving high quality results for a wide variety of historical documents. +Processing can be very slow, with a lot of potential to improve. We aim to work on this too, but contributions are always welcome. + ## Installation -Python versions `3.8-3.11` with Tensorflow versions `2.12-2.15` on Linux are currently supported. +Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported. For (limited) GPU support the CUDA toolkit needs to be installed. -You can either install via +You can either install from PyPI ``` pip install eynollah @@ -39,18 +43,21 @@ cd eynollah; pip install -e . Alternatively, you can run `make install` or `make install-dev` for editable installation. ## Models -Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). +Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB). -In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). +## Train +🚧 **Work in progress** + +In case you want to train your own model, have a look at [`sbb_pixelwise_segmentation`](https://github.com/qurator-spk/sbb_pixelwise_segmentation). ## Usage The command-line interface can be called like this: ```sh eynollah \ - -i \ + -i | -di \ -o \ - -m \ + -m \ [OPTIONS] ``` @@ -67,7 +74,6 @@ The following options can be used to further configure the processing: | `-ib` | apply binarization (the resulting image is saved to the output directory) | | `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | | `-ho` | ignore headers for reading order dectection | -| `-di ` | process all images in a directory in batch mode | | `-si ` | save image regions detected to this directory | | `-sd ` | save deskewed image to this directory | | `-sl ` | save layout prediction as plot to this directory | @@ -78,6 +84,7 @@ If no option is set, the tool will perform layout detection of main regions (bac The tool produces better quality output when RGB images are used as input than greyscale or binarized images. #### Use as OCR-D processor +🚧 **Work in progress** Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. @@ -95,11 +102,14 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps +#### Additional documentation +Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki). + ## How to cite If you find this tool useful in your work, please consider citing our paper: ```bibtex -@inproceedings{rezanezhad2023eynollah, +@inproceedings{hip23rezanezhad, title = {Document Layout Analysis with Deep Learning and Heuristics}, author = {Rezanezhad, Vahid and Baierer, Konstantin and Gerber, Mike and Labusch, Kai and Neudecker, Clemens}, booktitle = {Proceedings of the 7th International Workshop on Historical Document Imaging and Processing {HIP} 2023, From 899bb9f00c3b14306eb96c2a4955a0d599cc175a Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Wed, 10 Apr 2024 15:27:29 +0200 Subject: [PATCH 05/26] update GitHub actions --- .github/workflows/test-eynollah.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 30c9729..5a1acf4 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -14,8 +14,8 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v2 - - uses: actions/cache@v2 + - uses: actions/checkout@v4 + - uses: actions/cache@v4 id: model_cache with: path: models_eynollah @@ -24,7 +24,7 @@ jobs: if: steps.model_cache.outputs.cache-hit != 'true' run: make models - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From f88ee99f3c8aea2772abdfef6b8cbc919682a794 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Thu, 23 May 2024 21:17:38 +0200 Subject: [PATCH 06/26] non-legacy namespace package --- qurator/__init__.py | 1 - qurator/eynollah/__init__.py | 1 - setup.py | 1 - 3 files changed, 3 deletions(-) delete mode 100644 qurator/__init__.py diff --git a/qurator/__init__.py b/qurator/__init__.py deleted file mode 100644 index 5284146..0000000 --- a/qurator/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__import__("pkg_resources").declare_namespace(__name__) diff --git a/qurator/eynollah/__init__.py b/qurator/eynollah/__init__.py index 8b13789..e69de29 100644 --- a/qurator/eynollah/__init__.py +++ b/qurator/eynollah/__init__.py @@ -1 +0,0 @@ - diff --git a/setup.py b/setup.py index 9abf158..c78ee3f 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,6 @@ setup( author='Vahid Rezanezhad', url='https://github.com/qurator-spk/eynollah', license='Apache License 2.0', - namespace_packages=['qurator'], packages=find_packages(exclude=['tests']), install_requires=install_requires, package_data={ From 45bd76f5e81c305446750360c7ac62e38f454bac Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 14:27:56 +0000 Subject: [PATCH 07/26] fix namespace pkg setup --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c78ee3f..af8a321 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup, find_packages +from setuptools import setup, find_namespace_packages from json import load install_requires = open('requirements.txt').read().split('\n') @@ -13,7 +13,7 @@ setup( author='Vahid Rezanezhad', url='https://github.com/qurator-spk/eynollah', license='Apache License 2.0', - packages=find_packages(exclude=['tests']), + packages=find_namespace_packages(include=['qurator']), install_requires=install_requires, package_data={ '': ['*.json'] From ad133e34251b0164cca059542240690762dfb7db Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 31 Jul 2024 19:49:43 +0200 Subject: [PATCH 08/26] Update model download url --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 525e6c3..439b534 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,13 @@ models: models_eynollah models_eynollah: models_eynollah.tar.gz # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz - tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + tar xf models_eynollah_renamed_savedmodel.tar.gz --transform 's/models_eynollah_renamed_savedmodel/models_eynollah/' models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' # Install with pip install: From 3cfa447e84027867798a4c358244ed9ce0095ae9 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:01:36 +0200 Subject: [PATCH 09/26] remove CircleCI --- .circleci/config.yml | 51 -------------------------------------------- README.md | 1 - 2 files changed, 52 deletions(-) delete mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index d2b7057..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,51 +0,0 @@ -version: 2 - -jobs: - - build-python37: - machine: - - image: ubuntu-2004:2023.02.1 - - steps: - - checkout - - restore_cache: - keys: - - model-cache - - run: make models - - save_cache: - key: model-cache - paths: - models_eynollah.tar.gz - models_eynollah - - run: - name: "Set Python Version" - command: pyenv install -s 3.7.16 && pyenv global 3.7.16 - - run: make install - - run: make smoke-test - - build-python38: - machine: - - image: ubuntu-2004:2023.02.1 - steps: - - checkout - - restore_cache: - keys: - - model-cache - - run: make models - - save_cache: - key: model-cache - paths: - models_eynollah.tar.gz - models_eynollah - - run: - name: "Set Python Version" - command: pyenv install -s 3.8.16 && pyenv global 3.8.16 - - run: make install - - run: make smoke-test - -workflows: - version: 2 - build: - jobs: - # - build-python37 - - build-python38 diff --git a/README.md b/README.md index 302880a..3b4f784 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ > Document Layout Analysis with Deep Learning and Heuristics [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) -[![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) [![GH Actions Test](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml/badge.svg)](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml) [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) [![DOI](https://img.shields.io/badge/DOI-10.1145%2F3604951.3605513-red)](https://doi.org/10.1145/3604951.3605513) From 40f5408b1e576eb83983f28d4fcd68c298d79899 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:02:56 +0200 Subject: [PATCH 10/26] improve huggingface url --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3b4f784..f7a0a77 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ cd eynollah; pip install -e . Alternatively, you can run `make install` or `make install-dev` for editable installation. ## Models -Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB). +Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). ## Train 🚧 **Work in progress** From 38698c66097e7f3793eb4143a0519d4b36aa053f Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 31 Jul 2024 21:16:02 +0200 Subject: [PATCH 11/26] Update README.md --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f7a0a77..b47eae3 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,7 @@ * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface -:warning: Eynollah development is currently focused on achieving high quality results for a wide variety of historical documents. -Processing can be very slow, with a lot of potential to improve. We aim to work on this too, but contributions are always welcome. +:warning: Eynollah development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are always welcome. ## Installation Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported. @@ -79,8 +78,8 @@ The following options can be used to further configure the processing: | `-sp ` | save cropped page image to this directory | | `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | -If no option is set, the tool will perform layout detection of main regions (background, text, images, separators and marginals). -The tool produces better quality output when RGB images are used as input than greyscale or binarized images. +If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals). +Best quality output is produced when RGB images are used as input rather than greyscale or binarized images. #### Use as OCR-D processor 🚧 **Work in progress** From 8862df9156b73eae0c1afb43dd7082f4115555dd Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 31 Jul 2024 22:53:36 +0200 Subject: [PATCH 12/26] format options table --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index b47eae3..a92ad87 100644 --- a/README.md +++ b/README.md @@ -61,22 +61,22 @@ eynollah \ The following options can be used to further configure the processing: -| option | description | -|----------|:-------------| -| `-fl` | full layout analysis including all steps and segmentation classes | -| `-light` | lighter and faster but simpler method for main region detection and deskewing | -| `-tab` | apply table detection | -| `-ae` | apply enhancement (the resulting image is saved to the output directory) | -| `-as` | apply scaling | -| `-cl` | apply contour detection for curved text lines instead of bounding boxes | -| `-ib` | apply binarization (the resulting image is saved to the output directory) | -| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | -| `-ho` | ignore headers for reading order dectection | -| `-si ` | save image regions detected to this directory | -| `-sd ` | save deskewed image to this directory | -| `-sl ` | save layout prediction as plot to this directory | -| `-sp ` | save cropped page image to this directory | -| `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | +| option | description | +|-------------------|:-------------------------------------------------------------------------------| +| `-fl` | full layout analysis including all steps and segmentation classes | +| `-light` | lighter and faster but simpler method for main region detection and deskewing | +| `-tab` | apply table detection | +| `-ae` | apply enhancement (the resulting image is saved to the output directory) | +| `-as` | apply scaling | +| `-cl` | apply contour detection for curved text lines instead of bounding boxes | +| `-ib` | apply binarization (the resulting image is saved to the output directory) | +| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | +| `-ho` | ignore headers for reading order dectection | +| `-si ` | save image regions detected to this directory | +| `-sd ` | save deskewed image to this directory | +| `-sl ` | save layout prediction as plot to this directory | +| `-sp ` | save cropped page image to this directory | +| `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals). Best quality output is produced when RGB images are used as input rather than greyscale or binarized images. From c9f63826c05d5ddf975174a6ae28e7f7d9912fc0 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 1 Aug 2024 00:13:42 +0200 Subject: [PATCH 13/26] create draft pyproject.toml --- pyproject.toml.txt | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 pyproject.toml.txt diff --git a/pyproject.toml.txt b/pyproject.toml.txt new file mode 100644 index 0000000..43d7093 --- /dev/null +++ b/pyproject.toml.txt @@ -0,0 +1,38 @@ +[build-system] +requires = ["setuptools>=61.0", "setuptools-ocrd"] + +[project] +name = "eynollah" +version = "0.3.0" +authors = [ + {name = "Vahid Rezanezhad"}, + {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, +] +description = "Document Layout Analysis" +readme = "README.md" +license.file = "LICENSE" +requires-python = ">=3.8" +keywords = ["document layout analysis", "image segmentation"] + +dynamic = ["dependencies"] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Image Processing", +] + +[project.scripts] +eynollah = "eynollah.eynollah.cli:main" +ocrd-eynollah-segment = "eynollah.eynollah.ocrd_cli:main" + +[project.urls] +Homepage = "https://github.com/qurator-spk/eynollah" +Repository = "https://github.com/qurator-spk/eynollah.git" + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} From 7ded54a8d21b14fff3c4d048a33710910476b834 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 1 Aug 2024 00:25:31 +0200 Subject: [PATCH 14/26] rename GH action --- .github/workflows/test-eynollah.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 5a1acf4..98ddc06 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Python package +name: Test on: [push] From f0e7f75499577bea004bff5b7a3e8b5a673688a1 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 1 Aug 2024 00:30:25 +0200 Subject: [PATCH 15/26] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a92ad87..1720f7f 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface -:warning: Eynollah development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are always welcome. +:warning: Development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are welcome. ## Installation Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported. @@ -79,7 +79,7 @@ The following options can be used to further configure the processing: | `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals). -Best quality output is produced when RGB images are used as input rather than greyscale or binarized images. +The best output quality is produced when RGB images are used as input rather than greyscale or binarized images. #### Use as OCR-D processor 🚧 **Work in progress** From e3edb0ec30826541817263c0a4a52419fe430ca9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 9 Aug 2024 02:23:17 +0200 Subject: [PATCH 16/26] update --- qurator/eynollah/cli.py | 8 +++++--- qurator/eynollah/eynollah.py | 12 ++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..822db18 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -198,9 +198,11 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, ) - eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) + if dir_in: + eynollah.run() + else: + pcgts = eynollah.run() + eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c162af7..7f5561c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3091,7 +3091,8 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts else: contours_only_text_parent_h = None if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3101,8 +3102,11 @@ class Eynollah: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts - self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + + if self.dir_in: + self.writer.write_pagexml(pcgts) + #self.logger.info("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From 23ac58405c1642413aa34f493c43ed279bda4945 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:47:32 +0200 Subject: [PATCH 17/26] update pyproject.toml --- pyproject.toml.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pyproject.toml.txt b/pyproject.toml.txt index 43d7093..760c040 100644 --- a/pyproject.toml.txt +++ b/pyproject.toml.txt @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=61.0", "setuptools-ocrd"] +requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] [project] name = "eynollah" @@ -30,9 +30,20 @@ classifiers = [ eynollah = "eynollah.eynollah.cli:main" ocrd-eynollah-segment = "eynollah.eynollah.ocrd_cli:main" +[project.readme] +file = "README.md" +content-type = "text/markdown" + [project.urls] Homepage = "https://github.com/qurator-spk/eynollah" Repository = "https://github.com/qurator-spk/eynollah.git" [tool.setuptools.dynamic] dependencies = {file = ["requirements.txt"]} + +[tool.setuptools.packages.find] +where = ["src"] +namespaces = false + +[tool.setuptools.package-data] +"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] From 28ee1e527ea96ce992ebc534401ba171179de9f9 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:50:57 +0200 Subject: [PATCH 18/26] update pyproject.toml for v0.3.1 --- pyproject.toml | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8f83249 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] + +[project] +name = "eynollah" +version = "0.3.0" +authors = [ + {name = "Vahid Rezanezhad"}, + {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, +] +description = "Document Layout Analysis" +readme = "README.md" +license.file = "LICENSE" +requires-python = ">=3.8" +keywords = ["document layout analysis", "image segmentation"] + +dynamic = ["dependencies"] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Image Processing", +] + +[project.scripts] +eynollah = "qurator.eynollah.cli:main" +ocrd-eynollah-segment = "qurator.eynollah.ocrd_cli:main" + +[project.urls] +Homepage = "https://github.com/qurator-spk/eynollah" +Repository = "https://github.com/qurator-spk/eynollah.git" + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} + +[tool.setuptools.packages.find] +where = ["qurator"] + +[tool.setuptools.package-data] +"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] From 8f769663946c0074557a039bc5c8059ec9d410fc Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:51:48 +0200 Subject: [PATCH 19/26] update pyproject.toml for v0.3.1 --- pyproject.toml.txt | 49 ---------------------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 pyproject.toml.txt diff --git a/pyproject.toml.txt b/pyproject.toml.txt deleted file mode 100644 index 760c040..0000000 --- a/pyproject.toml.txt +++ /dev/null @@ -1,49 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] - -[project] -name = "eynollah" -version = "0.3.0" -authors = [ - {name = "Vahid Rezanezhad"}, - {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, -] -description = "Document Layout Analysis" -readme = "README.md" -license.file = "LICENSE" -requires-python = ">=3.8" -keywords = ["document layout analysis", "image segmentation"] - -dynamic = ["dependencies"] - -classifiers = [ - "Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Topic :: Scientific/Engineering :: Image Processing", -] - -[project.scripts] -eynollah = "eynollah.eynollah.cli:main" -ocrd-eynollah-segment = "eynollah.eynollah.ocrd_cli:main" - -[project.readme] -file = "README.md" -content-type = "text/markdown" - -[project.urls] -Homepage = "https://github.com/qurator-spk/eynollah" -Repository = "https://github.com/qurator-spk/eynollah.git" - -[tool.setuptools.dynamic] -dependencies = {file = ["requirements.txt"]} - -[tool.setuptools.packages.find] -where = ["src"] -namespaces = false - -[tool.setuptools.package-data] -"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] From 7f99526b9dae4aff85fa01092aeb921f8c699cf5 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 15 Aug 2024 23:59:18 +0200 Subject: [PATCH 20/26] update Makefile model location --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 439b534..4b43564 100644 --- a/Makefile +++ b/Makefile @@ -24,13 +24,15 @@ models: models_eynollah models_eynollah: models_eynollah.tar.gz # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz - tar xf models_eynollah_renamed_savedmodel.tar.gz --transform 's/models_eynollah_renamed_savedmodel/models_eynollah/' + # tar xf models_eynollah_renamed_savedmodel.tar.gz --transform 's/models_eynollah_renamed_savedmodel/models_eynollah/' + tar xf models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' - wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + wget https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz # Install with pip install: From 84d05bd0ae93c2fa09c3e5fa40caa8660241fffa Mon Sep 17 00:00:00 2001 From: kba Date: Fri, 23 Aug 2024 14:01:20 +0200 Subject: [PATCH 21/26] s,url,local_filename, --- qurator/eynollah/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/eynollah/processor.py b/qurator/eynollah/processor.py index ccec456..1bd190e 100644 --- a/qurator/eynollah/processor.py +++ b/qurator/eynollah/processor.py @@ -42,7 +42,7 @@ class EynollahProcessor(Processor): page = pcgts.get_Page() # XXX loses DPI information # page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename + image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename eynollah_kwargs = { 'dir_models': self.resolve_resource(self.parameter['models']), 'allow_enhancement': False, From 9ae05754364ed815dd73d74d79edc00a9f65fef4 Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 27 Aug 2024 14:52:01 +0200 Subject: [PATCH 22/26] :memo: changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index da2e1c0..0fd3938 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + * regression in OCR-D processor, #106 + * Expected Ptrcv::UMat for argument 'contour', #110 + * Memory usage explosion with very narrow images (e.g. book spine), #67 + ## [0.3.0] - 2023-05-13 Changed: From a5c7f223d1713ac2770bafd08dd3fc6d4b8e29a3 Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 27 Aug 2024 14:54:59 +0200 Subject: [PATCH 23/26] :package: v0.3.1 --- CHANGELOG.md | 4 ++++ pyproject.toml | 2 +- qurator/eynollah/ocrd-tool.json | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fd3938..cf6263d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +## [0.3.1] - 2024-08-27 + Fixed: * regression in OCR-D processor, #106 @@ -123,6 +125,8 @@ Fixed: Initial release +[0.3.1]: ../../compare/v0.3.1...v0.3.0 +[0.3.0]: ../../compare/v0.3.0...v0.2.0 [0.2.0]: ../../compare/v0.2.0...v0.1.0 [0.1.0]: ../../compare/v0.1.0...v0.0.11 [0.0.11]: ../../compare/v0.0.11...v0.0.10 diff --git a/pyproject.toml b/pyproject.toml index 8f83249..d6f16b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] [project] name = "eynollah" -version = "0.3.0" +version = "0.3.1" authors = [ {name = "Vahid Rezanezhad"}, {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, diff --git a/qurator/eynollah/ocrd-tool.json b/qurator/eynollah/ocrd-tool.json index 8a2cb95..4551168 100644 --- a/qurator/eynollah/ocrd-tool.json +++ b/qurator/eynollah/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "0.3.0", + "version": "0.3.1", "git_url": "https://github.com/qurator-spk/eynollah", "tools": { "ocrd-eynollah-segment": { From 62314c453ce7cbe0c66061b88a0367d4163124a2 Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 27 Aug 2024 15:04:57 +0200 Subject: [PATCH 24/26] fully transition to pyproject --- pyproject.toml | 3 +-- setup.py | 28 ++-------------------------- 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d6f16b3..8f9f175 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,6 @@ requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] [project] name = "eynollah" -version = "0.3.1" authors = [ {name = "Vahid Rezanezhad"}, {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, @@ -14,7 +13,7 @@ license.file = "LICENSE" requires-python = ">=3.8" keywords = ["document layout analysis", "image segmentation"] -dynamic = ["dependencies"] +dynamic = ["dependencies", "version"] classifiers = [ "Development Status :: 4 - Beta", diff --git a/setup.py b/setup.py index af8a321..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,27 +1,3 @@ -from setuptools import setup, find_namespace_packages -from json import load +from setuptools import setup -install_requires = open('requirements.txt').read().split('\n') -with open('ocrd-tool.json', 'r', encoding='utf-8') as f: - version = load(f)['version'] - -setup( - name='eynollah', - version=version, - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Vahid Rezanezhad', - url='https://github.com/qurator-spk/eynollah', - license='Apache License 2.0', - packages=find_namespace_packages(include=['qurator']), - install_requires=install_requires, - package_data={ - '': ['*.json'] - }, - entry_points={ - 'console_scripts': [ - 'eynollah=qurator.eynollah.cli:main', - 'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main', - ] - }, -) +setup() From 9367f86483329f7771d2d63cb063107d258f5412 Mon Sep 17 00:00:00 2001 From: kba Date: Thu, 29 Aug 2024 17:06:39 +0200 Subject: [PATCH 25/26] remove setup.py stub completely --- setup.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index 6068493..0000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup() From 84b844203d7a1cb27fccefd19dee2869b0abe3b2 Mon Sep 17 00:00:00 2001 From: kba Date: Thu, 29 Aug 2024 17:11:29 +0200 Subject: [PATCH 26/26] switch from qurator namespace to src-layout --- ocrd-tool.json | 2 +- pyproject.toml | 6 +++--- qurator/.gitkeep | 0 {qurator => src}/eynollah/__init__.py | 0 {qurator => src}/eynollah/cli.py | 2 +- {qurator => src}/eynollah/eynollah.py | 0 {qurator => src}/eynollah/ocrd-tool.json | 0 {qurator => src}/eynollah/ocrd_cli.py | 0 {qurator => src}/eynollah/plot.py | 0 {qurator => src}/eynollah/processor.py | 0 {qurator => src}/eynollah/utils/__init__.py | 0 {qurator => src}/eynollah/utils/contour.py | 0 {qurator => src}/eynollah/utils/counter.py | 0 {qurator => src}/eynollah/utils/drop_capitals.py | 0 {qurator => src}/eynollah/utils/is_nan.py | 0 {qurator => src}/eynollah/utils/marginals.py | 0 {qurator => src}/eynollah/utils/pil_cv2.py | 0 {qurator => src}/eynollah/utils/resize.py | 0 {qurator => src}/eynollah/utils/rotate.py | 0 {qurator => src}/eynollah/utils/separate_lines.py | 0 {qurator => src}/eynollah/utils/xml.py | 0 {qurator => src}/eynollah/writer.py | 0 tests/test_counter.py | 2 +- tests/test_dpi.py | 2 +- tests/test_run.py | 2 +- tests/test_smoke.py | 12 ++++++------ tests/test_xml.py | 2 +- 27 files changed, 15 insertions(+), 15 deletions(-) delete mode 100644 qurator/.gitkeep rename {qurator => src}/eynollah/__init__.py (100%) rename {qurator => src}/eynollah/cli.py (99%) rename {qurator => src}/eynollah/eynollah.py (100%) rename {qurator => src}/eynollah/ocrd-tool.json (100%) rename {qurator => src}/eynollah/ocrd_cli.py (100%) rename {qurator => src}/eynollah/plot.py (100%) rename {qurator => src}/eynollah/processor.py (100%) rename {qurator => src}/eynollah/utils/__init__.py (100%) rename {qurator => src}/eynollah/utils/contour.py (100%) rename {qurator => src}/eynollah/utils/counter.py (100%) rename {qurator => src}/eynollah/utils/drop_capitals.py (100%) rename {qurator => src}/eynollah/utils/is_nan.py (100%) rename {qurator => src}/eynollah/utils/marginals.py (100%) rename {qurator => src}/eynollah/utils/pil_cv2.py (100%) rename {qurator => src}/eynollah/utils/resize.py (100%) rename {qurator => src}/eynollah/utils/rotate.py (100%) rename {qurator => src}/eynollah/utils/separate_lines.py (100%) rename {qurator => src}/eynollah/utils/xml.py (100%) rename {qurator => src}/eynollah/writer.py (100%) diff --git a/ocrd-tool.json b/ocrd-tool.json index 5c48493..711a192 120000 --- a/ocrd-tool.json +++ b/ocrd-tool.json @@ -1 +1 @@ -qurator/eynollah/ocrd-tool.json \ No newline at end of file +src/eynollah/ocrd-tool.json \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8f9f175..67a420d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,8 @@ classifiers = [ ] [project.scripts] -eynollah = "qurator.eynollah.cli:main" -ocrd-eynollah-segment = "qurator.eynollah.ocrd_cli:main" +eynollah = "eynollah.cli:main" +ocrd-eynollah-segment = "eynollah.ocrd_cli:main" [project.urls] Homepage = "https://github.com/qurator-spk/eynollah" @@ -37,7 +37,7 @@ Repository = "https://github.com/qurator-spk/eynollah.git" dependencies = {file = ["requirements.txt"]} [tool.setuptools.packages.find] -where = ["qurator"] +where = ["src"] [tool.setuptools.package-data] "*" = ["*.json", '*.yml', '*.xml', '*.xsd'] diff --git a/qurator/.gitkeep b/qurator/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/qurator/eynollah/__init__.py b/src/eynollah/__init__.py similarity index 100% rename from qurator/eynollah/__init__.py rename to src/eynollah/__init__.py diff --git a/qurator/eynollah/cli.py b/src/eynollah/cli.py similarity index 99% rename from qurator/eynollah/cli.py rename to src/eynollah/cli.py index 822db18..d61928f 100644 --- a/qurator/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -1,7 +1,7 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel -from qurator.eynollah.eynollah import Eynollah +from eynollah.eynollah import Eynollah @click.command() diff --git a/qurator/eynollah/eynollah.py b/src/eynollah/eynollah.py similarity index 100% rename from qurator/eynollah/eynollah.py rename to src/eynollah/eynollah.py diff --git a/qurator/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json similarity index 100% rename from qurator/eynollah/ocrd-tool.json rename to src/eynollah/ocrd-tool.json diff --git a/qurator/eynollah/ocrd_cli.py b/src/eynollah/ocrd_cli.py similarity index 100% rename from qurator/eynollah/ocrd_cli.py rename to src/eynollah/ocrd_cli.py diff --git a/qurator/eynollah/plot.py b/src/eynollah/plot.py similarity index 100% rename from qurator/eynollah/plot.py rename to src/eynollah/plot.py diff --git a/qurator/eynollah/processor.py b/src/eynollah/processor.py similarity index 100% rename from qurator/eynollah/processor.py rename to src/eynollah/processor.py diff --git a/qurator/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py similarity index 100% rename from qurator/eynollah/utils/__init__.py rename to src/eynollah/utils/__init__.py diff --git a/qurator/eynollah/utils/contour.py b/src/eynollah/utils/contour.py similarity index 100% rename from qurator/eynollah/utils/contour.py rename to src/eynollah/utils/contour.py diff --git a/qurator/eynollah/utils/counter.py b/src/eynollah/utils/counter.py similarity index 100% rename from qurator/eynollah/utils/counter.py rename to src/eynollah/utils/counter.py diff --git a/qurator/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py similarity index 100% rename from qurator/eynollah/utils/drop_capitals.py rename to src/eynollah/utils/drop_capitals.py diff --git a/qurator/eynollah/utils/is_nan.py b/src/eynollah/utils/is_nan.py similarity index 100% rename from qurator/eynollah/utils/is_nan.py rename to src/eynollah/utils/is_nan.py diff --git a/qurator/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py similarity index 100% rename from qurator/eynollah/utils/marginals.py rename to src/eynollah/utils/marginals.py diff --git a/qurator/eynollah/utils/pil_cv2.py b/src/eynollah/utils/pil_cv2.py similarity index 100% rename from qurator/eynollah/utils/pil_cv2.py rename to src/eynollah/utils/pil_cv2.py diff --git a/qurator/eynollah/utils/resize.py b/src/eynollah/utils/resize.py similarity index 100% rename from qurator/eynollah/utils/resize.py rename to src/eynollah/utils/resize.py diff --git a/qurator/eynollah/utils/rotate.py b/src/eynollah/utils/rotate.py similarity index 100% rename from qurator/eynollah/utils/rotate.py rename to src/eynollah/utils/rotate.py diff --git a/qurator/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py similarity index 100% rename from qurator/eynollah/utils/separate_lines.py rename to src/eynollah/utils/separate_lines.py diff --git a/qurator/eynollah/utils/xml.py b/src/eynollah/utils/xml.py similarity index 100% rename from qurator/eynollah/utils/xml.py rename to src/eynollah/utils/xml.py diff --git a/qurator/eynollah/writer.py b/src/eynollah/writer.py similarity index 100% rename from qurator/eynollah/writer.py rename to src/eynollah/writer.py diff --git a/tests/test_counter.py b/tests/test_counter.py index 8ef0756..42bf074 100644 --- a/tests/test_counter.py +++ b/tests/test_counter.py @@ -1,5 +1,5 @@ from tests.base import main -from qurator.eynollah.utils.counter import EynollahIdCounter +from eynollah.utils.counter import EynollahIdCounter def test_counter_string(): c = EynollahIdCounter() diff --git a/tests/test_dpi.py b/tests/test_dpi.py index 510ffc5..3376bf4 100644 --- a/tests/test_dpi.py +++ b/tests/test_dpi.py @@ -1,6 +1,6 @@ import cv2 from pathlib import Path -from qurator.eynollah.utils.pil_cv2 import check_dpi +from eynollah.utils.pil_cv2 import check_dpi from tests.base import main def test_dpi(): diff --git a/tests/test_run.py b/tests/test_run.py index b1137e7..2596dad 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,7 @@ from os import environ from pathlib import Path from ocrd_utils import pushd_popd from tests.base import CapturingTestCase as TestCase, main -from qurator.eynollah.cli import main as eynollah_cli +from eynollah.cli import main as eynollah_cli testdir = Path(__file__).parent.resolve() diff --git a/tests/test_smoke.py b/tests/test_smoke.py index d069479..252213f 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1,7 +1,7 @@ def test_utils_import(): - import qurator.eynollah.utils - import qurator.eynollah.utils.contour - import qurator.eynollah.utils.drop_capitals - import qurator.eynollah.utils.drop_capitals - import qurator.eynollah.utils.is_nan - import qurator.eynollah.utils.rotate + import eynollah.utils + import eynollah.utils.contour + import eynollah.utils.drop_capitals + import eynollah.utils.drop_capitals + import eynollah.utils.is_nan + import eynollah.utils.rotate diff --git a/tests/test_xml.py b/tests/test_xml.py index 8422fd1..09a6ddf 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -1,5 +1,5 @@ from pytest import main -from qurator.eynollah.utils.xml import create_page_xml +from eynollah.utils.xml import create_page_xml from ocrd_models.ocrd_page import to_xml PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'