mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-21 13:54:16 +02:00
Merge branch 'main' into integrate-training-from-sbb_pixelwise_segmentation
This commit is contained in:
commit
af5abb77fd
24 changed files with 2778 additions and 3122 deletions
36
.github/workflows/test-eynollah.yml
vendored
36
.github/workflows/test-eynollah.yml
vendored
|
@ -24,24 +24,39 @@ jobs:
|
|||
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
||||
df -h
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/cache@v4
|
||||
- uses: actions/cache/restore@v4
|
||||
id: seg_model_cache
|
||||
with:
|
||||
path: models_layout_v0_5_0
|
||||
key: ${{ runner.os }}-models
|
||||
- uses: actions/cache@v4
|
||||
key: seg-models
|
||||
- uses: actions/cache/restore@v4
|
||||
id: ocr_model_cache
|
||||
with:
|
||||
path: models_ocr_v0_5_0
|
||||
key: ${{ runner.os }}-models
|
||||
- uses: actions/cache@v4
|
||||
path: models_ocr_v0_5_1
|
||||
key: ocr-models
|
||||
- uses: actions/cache/restore@v4
|
||||
id: bin_model_cache
|
||||
with:
|
||||
path: default-2021-03-09
|
||||
key: ${{ runner.os }}-modelbin
|
||||
key: bin-models
|
||||
- name: Download models
|
||||
if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true
|
||||
run: make models
|
||||
- uses: actions/cache/save@v4
|
||||
if: steps.seg_model_cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: models_layout_v0_5_0
|
||||
key: seg-models
|
||||
- uses: actions/cache/save@v4
|
||||
if: steps.ocr_model_cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: models_ocr_v0_5_1
|
||||
key: ocr-models
|
||||
- uses: actions/cache/save@v4
|
||||
if: steps.bin_model_cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: default-2021-03-09
|
||||
key: bin-models
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
|
@ -50,7 +65,12 @@ jobs:
|
|||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
make install-dev EXTRAS=OCR,plotting
|
||||
make deps-test
|
||||
make deps-test EXTRAS=OCR,plotting
|
||||
ls -l models_*
|
||||
- name: Lint with ruff
|
||||
uses: astral-sh/ruff-action@v3
|
||||
with:
|
||||
src: "./src"
|
||||
- name: Test with pytest
|
||||
run: make coverage PYTEST_ARGS="-vv --junitxml=pytest.xml"
|
||||
- name: Get coverage results
|
||||
|
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -2,8 +2,12 @@
|
|||
__pycache__
|
||||
sbb_newspapers_org_image/pylint.log
|
||||
models_eynollah*
|
||||
models_ocr*
|
||||
models_layout*
|
||||
default-2021-03-09
|
||||
output.html
|
||||
/build
|
||||
/dist
|
||||
*.tif
|
||||
*.sw?
|
||||
TAGS
|
||||
|
|
108
CHANGELOG.md
108
CHANGELOG.md
|
@ -5,17 +5,123 @@ Versioned according to [Semantic Versioning](http://semver.org/).
|
|||
|
||||
## Unreleased
|
||||
|
||||
Fixed:
|
||||
|
||||
* `join_polygons` always returning Polygon, not MultiPolygon, #203
|
||||
|
||||
## [0.6.0rc2] - 2025-10-14
|
||||
|
||||
Fixed:
|
||||
|
||||
* Prevent OOM GPU error by avoiding loading the `region_fl` model, #199
|
||||
* XML output: encoding should be `utf-8`, not `utf8`, #196, #197
|
||||
|
||||
## [0.6.0rc1] - 2025-10-10
|
||||
|
||||
Fixed:
|
||||
|
||||
* continue processing when no columns detected but text regions exist
|
||||
* convert marginalia to main text if no main text is present
|
||||
* reset deskewing angle to 0° when text covers <30% image area and detected angle >45°
|
||||
* :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.)
|
||||
* `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify
|
||||
* `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring
|
||||
* `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice
|
||||
* `get_marginals`: exit early if no peaks found to avoid spurious overlap mask
|
||||
* `get_smallest_skew`: after shifting search range of rotation angle, use overall best result
|
||||
* Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR)
|
||||
* OCR: re-instate missing methods and fix `utils_ocr` function calls
|
||||
* mbreorder/enhancement CLIs: missing imports
|
||||
* :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`), f458e3e
|
||||
* tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate`
|
||||
(so CUDA memory gets freed between tests if running on GPU)
|
||||
|
||||
Added:
|
||||
* :fire: `layout` CLI: new option `--model_version` to override default choices
|
||||
* test coverage for OCR options in `layout`
|
||||
* test coverage for table detection in `layout`
|
||||
* CI linting with ruff
|
||||
|
||||
Changed:
|
||||
|
||||
* polygons: slightly widen for regions and lines, increase for separators
|
||||
* various refactorings, some code style and identifier improvements
|
||||
* deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster),
|
||||
but use shared memory if necessary, and switch back from `loky` to stdlib,
|
||||
and shutdown in `del()` instead of `atexit`
|
||||
* :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too
|
||||
* OCR: allow running `-tr` without `-fl`, too
|
||||
* :fire: writer: use `@type='heading'` instead of `'header'` for headings
|
||||
* :fire: performance gains via refactoring (simplification, less copy-code, vectorization,
|
||||
avoiding unused calculations, avoiding unnecessary 3-channel image operations)
|
||||
* :fire: heuristic reading order detection: many improvements
|
||||
- contour vs splitter box matching:
|
||||
* contour must be contained in box exactly instead of heuristics
|
||||
* make fallback center matching, center must be contained in box
|
||||
- original vs deskewed contour matching:
|
||||
* same min-area filter on both sides
|
||||
* similar area score in addition to center proximity
|
||||
* avoid duplicate and missing mappings by allowing N:M
|
||||
matches and splitting+joining where necessary
|
||||
* CI: update+improve model caching
|
||||
|
||||
|
||||
## [0.5.0] - 2025-09-26
|
||||
|
||||
Fixed:
|
||||
|
||||
* restoring the contour in the original image caused an error due to an empty tuple, #154
|
||||
* removed NumPy warnings calculating sigma, mean, (fixed issue #158)
|
||||
* fixed bug in `separate_lines.py`, #124
|
||||
* Drop capitals are now handled separately from their corresponding textline
|
||||
* Marginals are now divided into left and right. Their reading order is written first for left marginals, then for right marginals, and within each side from top to bottom
|
||||
* Added a new page extraction model. Instead of bounding boxes, it outputs page contours in the XML file, improving results for skewed pages
|
||||
* Improved reading order for cases where a textline is segmented into multiple smaller textlines
|
||||
|
||||
Changed
|
||||
|
||||
* CLIs: read only allowed filename suffixes (image or XML) with `--dir_in`
|
||||
* CLIs: make all output option required, and `-i` / `-di` required but mutually exclusive
|
||||
* ocr CLI: drop redundant `-brb` in favour of just `-dib`
|
||||
* APIs: move all input/output path options from class (kwarg and attribute) ro `run` kwarg
|
||||
* layout textlines: polygonal also without `-cl`
|
||||
|
||||
Added:
|
||||
|
||||
* `eynollah machine-based-reading-order` CLI to run reading order detection, #175
|
||||
* `eynollah enhancement` CLI to run image enhancement, #175
|
||||
* Improved models for page extraction and reading order detection, #175
|
||||
* For the lightweight version (layout and textline detection), thresholds are now assigned to the artificial class. Users can apply these thresholds to improve detection of isolated textlines and regions. To counteract the drawback of thresholding, the skeleton of the artificial class is used to keep lines as thin as possible (resolved issues #163 and #161)
|
||||
* Added and integrated a trained CNN-RNN OCR models
|
||||
* Added and integrated a trained TrOCR model
|
||||
* Improved OCR detection to support vertical and curved textlines
|
||||
* Introduced a new machine-based reading order model with rotation augmentation
|
||||
* Optimized reading order speed by clustering text regions that belong to the same block, maintaining top-to-bottom order
|
||||
* Implemented text merging across textlines based on hyphenation when a line ends with a hyphen
|
||||
* Integrated image enhancement as a separate use case
|
||||
* Added reading order functionality on the layout level as a separate use case
|
||||
* CNN-RNN OCR models provide confidence scores for predictions
|
||||
* Added OCR visualization: predicted OCR can be overlaid on an image of the same size as the input
|
||||
* Introduced a threshold value for CNN-RNN OCR models, allowing users to filter out low-confidence textline predictions
|
||||
* For OCR, users can specify a single model by name instead of always using the default model
|
||||
* Under the OCR use case, if Ground Truth XMLs and images are available, textline image and corresponding text extraction can now be performed
|
||||
|
||||
Merged PRs:
|
||||
|
||||
* better machine based reading order + layout and textline + ocr by @vahidrezanezhad in https://github.com/qurator-spk/eynollah/pull/175
|
||||
* CI: pypi by @kba in https://github.com/qurator-spk/eynollah/pull/154
|
||||
* CI: Use most recent actions/setup-python@v5 by @kba in https://github.com/qurator-spk/eynollah/pull/157
|
||||
* update docker by @bertsky in https://github.com/qurator-spk/eynollah/pull/159
|
||||
* Ocrd fixes by @kba in https://github.com/qurator-spk/eynollah/pull/167
|
||||
* Updating readme for eynollah use cases cli by @kba in https://github.com/qurator-spk/eynollah/pull/166
|
||||
* OCR-D processor: expose reading_order_machine_based by @bertsky in https://github.com/qurator-spk/eynollah/pull/171
|
||||
* prepare release v0.5.0: fix logging by @bertsky in https://github.com/qurator-spk/eynollah/pull/180
|
||||
* mb_ro_on_layout: remove copy-pasta code not actually used by @kba in https://github.com/qurator-spk/eynollah/pull/181
|
||||
* prepare release v0.5.0: improve CLI docstring, refactor I/O path options from class to run kwargs, increase test coverage @bertsky in #182
|
||||
* prepare release v0.5.0: fix for OCR doit subtest by @bertsky in https://github.com/qurator-spk/eynollah/pull/183
|
||||
* Prepare release v0.5.0 by @kba in https://github.com/qurator-spk/eynollah/pull/178
|
||||
* updating eynollah README, how to use it for use cases by @vahidrezanezhad in https://github.com/qurator-spk/eynollah/pull/156
|
||||
* add feedback to command line interface by @michalbubula in https://github.com/qurator-spk/eynollah/pull/170
|
||||
|
||||
## [0.4.0] - 2025-04-07
|
||||
|
||||
|
@ -195,6 +301,8 @@ Fixed:
|
|||
Initial release
|
||||
|
||||
<!-- link-labels -->
|
||||
[0.6.0rc2]: ../../compare/v0.6.0rc2...v0.6.0rc1
|
||||
[0.6.0rc1]: ../../compare/v0.6.0rc1...v0.5.0
|
||||
[0.5.0]: ../../compare/v0.5.0...v0.4.0
|
||||
[0.4.0]: ../../compare/v0.4.0...v0.3.1
|
||||
[0.3.1]: ../../compare/v0.3.1...v0.3.0
|
||||
|
|
|
@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename
|
|||
RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json
|
||||
# install everything and reduce image size
|
||||
RUN make install EXTRAS=OCR && rm -rf /build/eynollah
|
||||
# fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow)
|
||||
RUN pip install nvidia-cudnn-cu11==8.6.0.163
|
||||
# smoke test
|
||||
RUN eynollah --help
|
||||
|
||||
|
|
72
Makefile
72
Makefile
|
@ -13,12 +13,18 @@ DOCKER ?= docker
|
|||
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
|
||||
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
|
||||
SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
|
||||
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
|
||||
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
|
||||
|
||||
BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
|
||||
BIN_MODELFILE = $(notdir $(BIN_MODEL))
|
||||
BIN_MODELNAME := default-2021-03-09
|
||||
|
||||
OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1
|
||||
OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
|
||||
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
|
||||
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
|
||||
|
||||
PYTEST_ARGS ?= -vv
|
||||
PYTEST_ARGS ?= -vv --isolate
|
||||
|
||||
# BEGIN-EVAL makefile-parser --make-help Makefile
|
||||
|
||||
|
@ -31,7 +37,8 @@ help:
|
|||
@echo " install Install package with pip"
|
||||
@echo " install-dev Install editable with pip"
|
||||
@echo " deps-test Install test dependencies with pip"
|
||||
@echo " models Download and extract models to $(CURDIR)/models_layout_v0_5_0"
|
||||
@echo " models Download and extract models to $(CURDIR):"
|
||||
@echo " $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)"
|
||||
@echo " smoke-test Run simple CLI check"
|
||||
@echo " ocrd-test Run OCR-D CLI check"
|
||||
@echo " test Run unit tests"
|
||||
|
@ -42,33 +49,32 @@ help:
|
|||
@echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]"
|
||||
@echo " SEG_MODEL URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]"
|
||||
@echo " BIN_MODEL URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]"
|
||||
@echo " OCR_MODEL URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]"
|
||||
@echo ""
|
||||
|
||||
# END-EVAL
|
||||
|
||||
|
||||
# Download and extract models to $(PWD)/models_layout_v0_5_0
|
||||
models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09
|
||||
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
|
||||
|
||||
models_layout_v0_5_0: models_layout_v0_5_0.tar.gz
|
||||
tar zxf models_layout_v0_5_0.tar.gz
|
||||
# do not download these files if we already have the directories
|
||||
.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
|
||||
|
||||
models_layout_v0_5_0.tar.gz:
|
||||
$(BIN_MODELFILE):
|
||||
wget -O $@ $(BIN_MODEL)
|
||||
$(SEG_MODELFILE):
|
||||
wget -O $@ $(SEG_MODEL)
|
||||
|
||||
models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz
|
||||
tar zxf models_ocr_v0_5_0.tar.gz
|
||||
|
||||
models_ocr_v0_5_0.tar.gz:
|
||||
$(OCR_MODELFILE):
|
||||
wget -O $@ $(OCR_MODEL)
|
||||
|
||||
default-2021-03-09: $(notdir $(BIN_MODEL))
|
||||
unzip $(notdir $(BIN_MODEL))
|
||||
$(BIN_MODELNAME): $(BIN_MODELFILE)
|
||||
mkdir $@
|
||||
mv $(basename $(notdir $(BIN_MODEL))) $@
|
||||
|
||||
$(notdir $(BIN_MODEL)):
|
||||
wget $(BIN_MODEL)
|
||||
unzip -d $@ $<
|
||||
$(SEG_MODELNAME): $(SEG_MODELFILE)
|
||||
tar zxf $<
|
||||
$(OCR_MODELNAME): $(OCR_MODELFILE)
|
||||
tar zxf $<
|
||||
|
||||
build:
|
||||
$(PIP) install build
|
||||
|
@ -82,28 +88,34 @@ install:
|
|||
install-dev:
|
||||
$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)])
|
||||
|
||||
deps-test: models_layout_v0_5_0
|
||||
ifeq (OCR,$(findstring OCR, $(EXTRAS)))
|
||||
deps-test: $(OCR_MODELNAME)
|
||||
endif
|
||||
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
|
||||
$(PIP) install -r requirements-test.txt
|
||||
ifeq (OCR,$(findstring OCR, $(EXTRAS)))
|
||||
ln -rs $(OCR_MODELNAME)/* $(SEG_MODELNAME)/
|
||||
endif
|
||||
|
||||
smoke-test: TMPDIR != mktemp -d
|
||||
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||
# layout analysis:
|
||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0
|
||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
||||
# layout, directory mode (skip one, add one):
|
||||
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0
|
||||
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
||||
# mbreorder, directory mode (overwrite):
|
||||
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0
|
||||
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
|
||||
# binarize:
|
||||
eynollah binarization -m $(CURDIR)/default-2021-03-09 -i $< -o $(TMPDIR)/$(<F)
|
||||
eynollah binarization -m $(CURDIR)/$(BIN_MODELNAME) -i $< -o $(TMPDIR)/$(<F)
|
||||
test -s $(TMPDIR)/$(<F)
|
||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||
# enhance:
|
||||
eynollah enhancement -m $(CURDIR)/models_layout_v0_5_0 -sos -i $< -o $(TMPDIR) -O
|
||||
eynollah enhancement -m $(CURDIR)/$(SEG_MODELNAME) -sos -i $< -o $(TMPDIR) -O
|
||||
test -s $(TMPDIR)/$(<F)
|
||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||
$(RM) -r $(TMPDIR)
|
||||
|
@ -114,18 +126,18 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
|||
cp $< $(TMPDIR)
|
||||
ocrd workspace -d $(TMPDIR) init
|
||||
ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
|
||||
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/models_layout_v0_5_0
|
||||
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/$(SEG_MODELNAME)
|
||||
result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
|
||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
|
||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
|
||||
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/default-2021-03-09
|
||||
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/default-2021-03-09 -P operation_level region
|
||||
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/$(BIN_MODELNAME)
|
||||
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/$(BIN_MODELNAME) -P operation_level region
|
||||
$(RM) -r $(TMPDIR)
|
||||
|
||||
# Run unit tests
|
||||
test: export MODELS_LAYOUT=$(CURDIR)/models_layout_v0_5_0
|
||||
test: export MODELS_OCR=$(CURDIR)/models_ocr_v0_5_0
|
||||
test: export MODELS_BIN=$(CURDIR)/default-2021-03-09
|
||||
test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME)
|
||||
test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME)
|
||||
test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME)
|
||||
test:
|
||||
$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)
|
||||
|
||||
|
|
|
@ -54,3 +54,21 @@ where = ["src"]
|
|||
[tool.coverage.run]
|
||||
branch = true
|
||||
source = ["eynollah"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = [
|
||||
# disable unused imports
|
||||
"F401",
|
||||
# disable import order
|
||||
"E402",
|
||||
# disable unused variables
|
||||
"F841",
|
||||
# disable bare except
|
||||
"E722",
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "preserve"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
pytest
|
||||
pytest-subtests
|
||||
pytest-isolate
|
||||
coverage[toml]
|
||||
black
|
||||
|
|
|
@ -5,5 +5,4 @@ scikit-learn >= 0.23.2
|
|||
tensorflow < 2.13
|
||||
numba <= 0.58.1
|
||||
scikit-image
|
||||
loky
|
||||
biopython
|
||||
|
|
|
@ -202,6 +202,13 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
|
|||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model_version",
|
||||
"-mv",
|
||||
help="override default versions of model categories",
|
||||
type=(str, str),
|
||||
multiple=True,
|
||||
)
|
||||
@click.option(
|
||||
"--save_images",
|
||||
"-si",
|
||||
|
@ -373,7 +380,7 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
|
|||
help="Setup a basic console logger",
|
||||
)
|
||||
|
||||
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
|
||||
def layout(image, out, overwrite, dir_in, model, model_version, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
|
||||
if setup_logging:
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(logging.INFO)
|
||||
|
@ -404,6 +411,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
eynollah = Eynollah(
|
||||
model,
|
||||
model_versions=model_version,
|
||||
extract_only_images=extract_only_images,
|
||||
enable_plotting=enable_plotting,
|
||||
allow_enhancement=allow_enhancement,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -6,23 +6,23 @@ from logging import Logger
|
|||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
import atexit
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from multiprocessing import cpu_count
|
||||
import gc
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from ocrd_utils import getLogger, tf_disable_interactive_logs
|
||||
import tensorflow as tf
|
||||
from skimage.morphology import skeletonize
|
||||
from tensorflow.keras.models import load_model
|
||||
|
||||
from .utils.resize import resize_image
|
||||
from .utils.pil_cv2 import pil2cv
|
||||
from .utils import (
|
||||
is_image_filename,
|
||||
crop_image_inside_box
|
||||
)
|
||||
from .eynollah import PatchEncoder, Patches
|
||||
|
||||
DPI_THRESHOLD = 298
|
||||
KERNEL = np.ones((5, 5), np.uint8)
|
||||
|
|
|
@ -6,25 +6,24 @@ from logging import Logger
|
|||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
import atexit
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from multiprocessing import cpu_count
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from ocrd_utils import getLogger
|
||||
import statistics
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import load_model
|
||||
from .utils.resize import resize_image
|
||||
|
||||
from .utils.resize import resize_image
|
||||
from .utils.contour import (
|
||||
find_new_features_of_contours,
|
||||
return_contours_of_image,
|
||||
return_parent_contours,
|
||||
)
|
||||
from .utils import is_xml_filename
|
||||
from .eynollah import PatchEncoder, Patches
|
||||
|
||||
DPI_THRESHOLD = 298
|
||||
KERNEL = np.ones((5, 5), np.uint8)
|
||||
|
@ -806,7 +805,7 @@ class machine_based_reading_order_on_layout:
|
|||
tree_xml.write(os.path.join(dir_out, file_name+'.xml'),
|
||||
xml_declaration=True,
|
||||
method='xml',
|
||||
encoding="utf8",
|
||||
encoding="utf-8",
|
||||
default_namespace=None)
|
||||
|
||||
#sys.exit()
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"version": "0.5.0",
|
||||
"version": "0.6.0rc2",
|
||||
"git_url": "https://github.com/qurator-spk/eynollah",
|
||||
"dockerhub": "ocrd/eynollah",
|
||||
"tools": {
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,15 @@
|
|||
from typing import Sequence, Union
|
||||
from numbers import Number
|
||||
from functools import partial
|
||||
import itertools
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from shapely import geometry
|
||||
from scipy.sparse.csgraph import minimum_spanning_tree
|
||||
from shapely.geometry import Polygon, LineString
|
||||
from shapely.geometry.polygon import orient
|
||||
from shapely import set_precision
|
||||
from shapely.ops import unary_union, nearest_points
|
||||
|
||||
from .rotate import rotate_image, rotation_image_new
|
||||
|
||||
|
@ -28,38 +36,31 @@ def find_contours_mean_y_diff(contours_main):
|
|||
return np.mean(np.diff(np.sort(np.array(cy_main))))
|
||||
|
||||
def get_text_region_boxes_by_given_contours(contours):
|
||||
boxes = []
|
||||
contours_new = []
|
||||
for jj in range(len(contours)):
|
||||
box = cv2.boundingRect(contours[jj])
|
||||
boxes.append(box)
|
||||
contours_new.append(contours[jj])
|
||||
return [cv2.boundingRect(contour)
|
||||
for contour in contours]
|
||||
|
||||
return boxes, contours_new
|
||||
|
||||
def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
|
||||
def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
|
||||
found_polygons_early = []
|
||||
for jv,c in enumerate(contours):
|
||||
if len(c) < 3: # A polygon cannot have less than 3 points
|
||||
for jv, contour in enumerate(contours):
|
||||
if len(contour) < 3: # A polygon cannot have less than 3 points
|
||||
continue
|
||||
|
||||
polygon = geometry.Polygon([point[0] for point in c])
|
||||
polygon = contour2polygon(contour, dilate=dilate)
|
||||
area = polygon.area
|
||||
if (area >= min_area * np.prod(image.shape[:2]) and
|
||||
area <= max_area * np.prod(image.shape[:2]) and
|
||||
hierarchy[0][jv][3] == -1):
|
||||
found_polygons_early.append(np.array([[point]
|
||||
for point in polygon.exterior.coords], dtype=np.uint))
|
||||
found_polygons_early.append(polygon2contour(polygon))
|
||||
return found_polygons_early
|
||||
|
||||
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
|
||||
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
|
||||
found_polygons_early = []
|
||||
for jv,c in enumerate(contours):
|
||||
if len(c) < 3: # A polygon cannot have less than 3 points
|
||||
for jv, contour in enumerate(contours):
|
||||
if len(contour) < 3: # A polygon cannot have less than 3 points
|
||||
continue
|
||||
|
||||
polygon = geometry.Polygon([point[0] for point in c])
|
||||
# area = cv2.contourArea(c)
|
||||
polygon = contour2polygon(contour, dilate=dilate)
|
||||
# area = cv2.contourArea(contour)
|
||||
area = polygon.area
|
||||
##print(np.prod(thresh.shape[:2]))
|
||||
# Check that polygon has area greater than minimal area
|
||||
|
@ -68,66 +69,41 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
|
|||
area <= max_area * np.prod(image.shape[:2]) and
|
||||
# hierarchy[0][jv][3]==-1
|
||||
True):
|
||||
# print(c[0][0][1])
|
||||
found_polygons_early.append(np.array([[point]
|
||||
for point in polygon.exterior.coords], dtype=np.int32))
|
||||
# print(contour[0][0][1])
|
||||
found_polygons_early.append(polygon2contour(polygon))
|
||||
return found_polygons_early
|
||||
|
||||
def find_new_features_of_contours(contours_main):
|
||||
areas_main = np.array([cv2.contourArea(contours_main[j])
|
||||
for j in range(len(contours_main))])
|
||||
M_main = [cv2.moments(contours_main[j])
|
||||
for j in range(len(contours_main))]
|
||||
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32))
|
||||
for j in range(len(M_main))]
|
||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32))
|
||||
for j in range(len(M_main))]
|
||||
try:
|
||||
x_min_main = np.array([np.min(contours_main[j][:, 0, 0])
|
||||
for j in range(len(contours_main))])
|
||||
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0])
|
||||
for j in range(len(contours_main))])
|
||||
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0]
|
||||
for j in range(len(contours_main))])
|
||||
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1]
|
||||
for j in range(len(contours_main))])
|
||||
x_max_main = np.array([np.max(contours_main[j][:, 0, 0])
|
||||
for j in range(len(contours_main))])
|
||||
y_min_main = np.array([np.min(contours_main[j][:, 0, 1])
|
||||
for j in range(len(contours_main))])
|
||||
y_max_main = np.array([np.max(contours_main[j][:, 0, 1])
|
||||
for j in range(len(contours_main))])
|
||||
except:
|
||||
x_min_main = np.array([np.min(contours_main[j][:, 0])
|
||||
for j in range(len(contours_main))])
|
||||
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0])
|
||||
for j in range(len(contours_main))])
|
||||
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0]
|
||||
for j in range(len(contours_main))])
|
||||
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1]
|
||||
for j in range(len(contours_main))])
|
||||
x_max_main = np.array([np.max(contours_main[j][:, 0])
|
||||
for j in range(len(contours_main))])
|
||||
y_min_main = np.array([np.min(contours_main[j][:, 1])
|
||||
for j in range(len(contours_main))])
|
||||
y_max_main = np.array([np.max(contours_main[j][:, 1])
|
||||
for j in range(len(contours_main))])
|
||||
# dis_x=np.abs(x_max_main-x_min_main)
|
||||
def find_center_of_contours(contours):
|
||||
moments = [cv2.moments(contour) for contour in contours]
|
||||
cx = [feat["m10"] / (feat["m00"] + 1e-32)
|
||||
for feat in moments]
|
||||
cy = [feat["m01"] / (feat["m00"] + 1e-32)
|
||||
for feat in moments]
|
||||
return cx, cy
|
||||
|
||||
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
|
||||
def find_new_features_of_contours(contours):
|
||||
# areas = np.array([cv2.contourArea(contour) for contour in contours])
|
||||
cx, cy = find_center_of_contours(contours)
|
||||
slice_x = np.index_exp[:, 0, 0]
|
||||
slice_y = np.index_exp[:, 0, 1]
|
||||
if any(contour.ndim < 3 for contour in contours):
|
||||
slice_x = np.index_exp[:, 0]
|
||||
slice_y = np.index_exp[:, 1]
|
||||
x_min = np.array([np.min(contour[slice_x]) for contour in contours])
|
||||
x_max = np.array([np.max(contour[slice_x]) for contour in contours])
|
||||
y_min = np.array([np.min(contour[slice_y]) for contour in contours])
|
||||
y_max = np.array([np.max(contour[slice_y]) for contour in contours])
|
||||
# dis_x=np.abs(x_max-x_min)
|
||||
y_corr_x_min = np.array([contour[np.argmin(contour[slice_x])][slice_y[1:]]
|
||||
for contour in contours])
|
||||
|
||||
def find_features_of_contours(contours_main):
|
||||
areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||
M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||
cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
|
||||
cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
|
||||
x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))])
|
||||
x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))])
|
||||
return cx, cy, x_min, x_max, y_min, y_max, y_corr_x_min
|
||||
|
||||
y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))])
|
||||
y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))])
|
||||
def find_features_of_contours(contours):
|
||||
y_min = np.array([np.min(contour[:,0,1]) for contour in contours])
|
||||
y_max = np.array([np.max(contour[:,0,1]) for contour in contours])
|
||||
|
||||
return y_min_main, y_max_main
|
||||
return y_min, y_max
|
||||
|
||||
def return_parent_contours(contours, hierarchy):
|
||||
contours_parent = [contours[i]
|
||||
|
@ -135,16 +111,13 @@ def return_parent_contours(contours, hierarchy):
|
|||
if hierarchy[0][i][3] == -1]
|
||||
return contours_parent
|
||||
|
||||
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
||||
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
if region_pre_p.ndim == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == label) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
cnts_images = (region_pre_p[:, :] == label) * 1
|
||||
_, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
|
||||
|
||||
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
|
||||
|
@ -153,13 +126,11 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
|||
return contours_imgs
|
||||
|
||||
def do_work_of_contours_in_image(contour, index_r_con, img, slope_first):
|
||||
img_copy = np.zeros(img.shape)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1))
|
||||
img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[contour], color=1)
|
||||
|
||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||
img_copy = img_copy.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
_, thresh = cv2.threshold(img_copy, 0, 255, 0)
|
||||
|
||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
|
@ -182,8 +153,8 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
|||
cnts_org = []
|
||||
# print(cnts,'cnts')
|
||||
for i in range(len(cnts)):
|
||||
img_copy = np.zeros(img.shape)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1))
|
||||
img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=1)
|
||||
|
||||
# plt.imshow(img_copy)
|
||||
# plt.show()
|
||||
|
@ -194,9 +165,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
|||
# plt.imshow(img_copy)
|
||||
# plt.show()
|
||||
|
||||
img_copy = img_copy.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
_, thresh = cv2.threshold(img_copy, 0, 255, 0)
|
||||
|
||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||
|
@ -213,12 +182,11 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
|
|||
interpolation=cv2.INTER_NEAREST)
|
||||
cnts_org = []
|
||||
for cnt in cnts:
|
||||
img_copy = np.zeros(img.shape)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[(cnt / zoom).astype(int)], color=(1, 1, 1))
|
||||
img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[cnt // zoom], color=1)
|
||||
|
||||
img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
_, thresh = cv2.threshold(img_copy, 0, 255, 0)
|
||||
|
||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||
|
@ -228,14 +196,13 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
|
|||
return cnts_org
|
||||
|
||||
def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first, confidence_matrix):
|
||||
img_copy = np.zeros(img.shape)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1))
|
||||
confidence_matrix_mapped_with_contour = confidence_matrix * img_copy[:,:,0]
|
||||
confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy[:,:,0]))
|
||||
img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=1)
|
||||
confidence_matrix_mapped_with_contour = confidence_matrix * img_copy
|
||||
confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy))
|
||||
|
||||
img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
_, thresh = cv2.threshold(img_copy, 0, 255, 0)
|
||||
|
||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if len(cont_int)==0:
|
||||
|
@ -246,34 +213,27 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
|
|||
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||
return cont_int[0], index_r_con, confidence_contour
|
||||
|
||||
def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map):
|
||||
def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
|
||||
if not len(cnts):
|
||||
return [], []
|
||||
return []
|
||||
|
||||
confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
||||
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
||||
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||
#cnts = cnts/2
|
||||
cnts = [(i/6).astype(int) for i in cnts]
|
||||
results = map(partial(do_back_rotation_and_get_cnt_back,
|
||||
img=img,
|
||||
slope_first=slope_first,
|
||||
confidence_matrix=confidence_matrix,
|
||||
),
|
||||
cnts, range(len(cnts)))
|
||||
contours, indexes, conf_contours = tuple(zip(*results))
|
||||
return [i*6 for i in contours], list(conf_contours)
|
||||
confidence_matrix = cv2.resize(confidence_matrix,
|
||||
(img.shape[1] // 6, img.shape[0] // 6),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
confs = []
|
||||
for cnt in cnts:
|
||||
cnt_mask = np.zeros(confidence_matrix.shape)
|
||||
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
|
||||
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
|
||||
return confs
|
||||
|
||||
def return_contours_of_interested_textline(region_pre_p, pixel):
|
||||
def return_contours_of_interested_textline(region_pre_p, label):
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
if region_pre_p.ndim == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == label) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
cnts_images = (region_pre_p[:, :] == label) * 1
|
||||
_, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
|
||||
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
|
||||
|
@ -283,51 +243,123 @@ def return_contours_of_interested_textline(region_pre_p, pixel):
|
|||
|
||||
def return_contours_of_image(image):
|
||||
if len(image.shape) == 2:
|
||||
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
|
||||
image = image.astype(np.uint8)
|
||||
imgray = image
|
||||
else:
|
||||
image = image.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
return contours, hierarchy
|
||||
|
||||
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
def dilate_textline_contours(all_found_textline_polygons):
|
||||
return [[polygon2contour(contour2polygon(contour, dilate=6))
|
||||
for contour in region]
|
||||
for region in all_found_textline_polygons]
|
||||
|
||||
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(
|
||||
thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size)
|
||||
def dilate_textregion_contours(all_found_textline_polygons):
|
||||
return [polygon2contour(contour2polygon(contour, dilate=6))
|
||||
for contour in all_found_textline_polygons]
|
||||
|
||||
return contours_imgs
|
||||
def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
|
||||
polygon = Polygon([point[0] for point in contour])
|
||||
if dilate:
|
||||
polygon = polygon.buffer(dilate)
|
||||
if polygon.geom_type == 'GeometryCollection':
|
||||
# heterogeneous result: filter zero-area shapes (LineString, Point)
|
||||
polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
|
||||
if polygon.geom_type == 'MultiPolygon':
|
||||
# homogeneous result: construct convex hull to connect
|
||||
polygon = join_polygons(polygon.geoms)
|
||||
return make_valid(polygon)
|
||||
|
||||
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
def polygon2contour(polygon: Polygon) -> np.ndarray:
|
||||
polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
|
||||
return np.maximum(0, polygon).astype(int)[:, np.newaxis]
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(
|
||||
thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area)
|
||||
def make_intersection(poly1, poly2):
|
||||
interp = poly1.intersection(poly2)
|
||||
# post-process
|
||||
if interp.is_empty or interp.area == 0.0:
|
||||
return None
|
||||
if interp.geom_type == 'GeometryCollection':
|
||||
# heterogeneous result: filter zero-area shapes (LineString, Point)
|
||||
interp = unary_union([geom for geom in interp.geoms if geom.area > 0])
|
||||
if interp.geom_type == 'MultiPolygon':
|
||||
# homogeneous result: construct convex hull to connect
|
||||
interp = join_polygons(interp.geoms)
|
||||
assert interp.geom_type == 'Polygon', interp.wkt
|
||||
interp = make_valid(interp)
|
||||
return interp
|
||||
|
||||
img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
|
||||
img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
|
||||
|
||||
return img_ret[:, :, 0]
|
||||
def make_valid(polygon: Polygon) -> Polygon:
|
||||
"""Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
|
||||
def isint(x):
|
||||
return isinstance(x, int) or int(x) == x
|
||||
# make sure rounding does not invalidate
|
||||
if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
|
||||
polygon = Polygon(np.round(polygon.exterior.coords))
|
||||
points = list(polygon.exterior.coords[:-1])
|
||||
# try by re-arranging points
|
||||
for split in range(1, len(points)):
|
||||
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
|
||||
break
|
||||
# simplification may not be possible (at all) due to ordering
|
||||
# in that case, try another starting point
|
||||
polygon = Polygon(points[-split:]+points[:-split])
|
||||
# try by simplification
|
||||
for tolerance in range(int(polygon.area + 1.5)):
|
||||
if polygon.is_valid:
|
||||
break
|
||||
# simplification may require a larger tolerance
|
||||
polygon = polygon.simplify(tolerance + 1)
|
||||
# try by enlarging
|
||||
for tolerance in range(1, int(polygon.area + 2.5)):
|
||||
if polygon.is_valid:
|
||||
break
|
||||
# enlargement may require a larger tolerance
|
||||
polygon = polygon.buffer(tolerance)
|
||||
assert polygon.is_valid, polygon.wkt
|
||||
return polygon
|
||||
|
||||
def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
|
||||
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
|
||||
# ensure input polygons are simply typed and all oriented equally
|
||||
polygons = [orient(poly)
|
||||
for poly in itertools.chain.from_iterable(
|
||||
[poly.geoms
|
||||
if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
|
||||
else [poly]
|
||||
for poly in polygons])]
|
||||
npoly = len(polygons)
|
||||
if npoly == 1:
|
||||
return polygons[0]
|
||||
# find min-dist path through all polygons (travelling salesman)
|
||||
pairs = itertools.combinations(range(npoly), 2)
|
||||
dists = np.zeros((npoly, npoly), dtype=float)
|
||||
for i, j in pairs:
|
||||
dist = polygons[i].distance(polygons[j])
|
||||
if dist < 1e-5:
|
||||
dist = 1e-5 # if pair merely touches, we still need to get an edge
|
||||
dists[i, j] = dist
|
||||
dists[j, i] = dist
|
||||
dists = minimum_spanning_tree(dists, overwrite=True)
|
||||
# add bridge polygons (where necessary)
|
||||
for prevp, nextp in zip(*dists.nonzero()):
|
||||
prevp = polygons[prevp]
|
||||
nextp = polygons[nextp]
|
||||
nearest = nearest_points(prevp, nextp)
|
||||
bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
|
||||
polygons.append(bridgep)
|
||||
jointp = unary_union(polygons)
|
||||
if jointp.geom_type == 'MultiPolygon':
|
||||
jointp = unary_union(jointp.geoms)
|
||||
assert jointp.geom_type == 'Polygon', jointp.wkt
|
||||
# follow-up calculations will necessarily be integer;
|
||||
# so anticipate rounding here and then ensure validity
|
||||
jointp2 = set_precision(jointp, 1.0)
|
||||
if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
|
||||
jointp2 = Polygon(np.round(jointp.exterior.coords))
|
||||
jointp2 = make_valid(jointp2)
|
||||
assert jointp2.geom_type == 'Polygon', jointp2.wkt
|
||||
return jointp2
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
from .contour import (
|
||||
find_center_of_contours,
|
||||
find_new_features_of_contours,
|
||||
return_contours_of_image,
|
||||
return_parent_contours,
|
||||
|
@ -22,8 +23,8 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
):
|
||||
# print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape')
|
||||
# print(all_found_textline_polygons[3])
|
||||
cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent)
|
||||
cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_h)
|
||||
cx_m, cy_m = find_center_of_contours(contours_only_text_parent)
|
||||
cx_h, cy_h = find_center_of_contours(contours_only_text_parent_h)
|
||||
cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contours(polygons_of_drop_capitals)
|
||||
|
||||
img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
|
@ -89,9 +90,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
|
||||
|
||||
# print(region_final,'region_final')
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -153,9 +154,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
# areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))])
|
||||
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -208,7 +209,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
try:
|
||||
# print(all_found_textline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -261,7 +262,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
else:
|
||||
pass
|
||||
|
||||
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
##cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
###print(all_box_coord[j_cont])
|
||||
###print(cx_t)
|
||||
###print(cy_t)
|
||||
|
@ -315,9 +316,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
|
||||
|
||||
# print(region_final,'region_final')
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -375,12 +376,12 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
# areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))])
|
||||
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
|
||||
# print(cx_t,'print')
|
||||
try:
|
||||
# print(all_found_textline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -453,7 +454,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
#####try:
|
||||
#####if len(contours_new_parent)==1:
|
||||
######print(all_found_textline_polygons[j_cont][0])
|
||||
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
|
||||
#####cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[j_cont])
|
||||
######print(all_box_coord[j_cont])
|
||||
######print(cx_t)
|
||||
######print(cy_t)
|
||||
|
|
|
@ -99,6 +99,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
|||
except:
|
||||
point_left=first_nonzero
|
||||
|
||||
if point_left == first_nonzero and point_right == last_nonzero:
|
||||
return text_regions
|
||||
|
||||
|
||||
if point_right>=mask_marginals.shape[1]:
|
||||
|
|
|
@ -17,9 +17,12 @@ from .contour import (
|
|||
return_contours_of_interested_textline,
|
||||
find_contours_mean_y_diff,
|
||||
)
|
||||
from .shm import share_ndarray, wrap_ndarray_shared
|
||||
from . import (
|
||||
find_num_col_deskew,
|
||||
crop_image_inside_box,
|
||||
box2rect,
|
||||
box2slice,
|
||||
)
|
||||
|
||||
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
||||
|
@ -64,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
|||
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
||||
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
||||
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
|
||||
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
|
||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
||||
|
||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||
|
@ -75,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
|||
|
||||
clusters_to_be_deleted = []
|
||||
if len(arg_diff_cluster) > 0:
|
||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
|
||||
clusters_to_be_deleted.append(
|
||||
arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
|
||||
for i in range(len(arg_diff_cluster) - 1):
|
||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
|
||||
arg_diff_cluster[i + 1] + 1])
|
||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
||||
clusters_to_be_deleted.append(
|
||||
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
|
||||
arg_diff_cluster[i + 1] + 1])
|
||||
clusters_to_be_deleted.append(
|
||||
arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
||||
if len(clusters_to_be_deleted) > 0:
|
||||
peaks_new_extra = []
|
||||
for m in range(len(clusters_to_be_deleted)):
|
||||
|
@ -135,13 +142,12 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
|||
rotation_matrix)
|
||||
|
||||
def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||
(h, w) = img_patch.shape[:2]
|
||||
h, w = img_patch.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
||||
x_d = M[0, 2]
|
||||
y_d = M[1, 2]
|
||||
thetha = thetha / 180. * np.pi
|
||||
rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
|
||||
rotation_matrix = M[:2, :2]
|
||||
contour_text_interest_copy = contour_text_interest.copy()
|
||||
|
||||
x_cont = contour_text_interest[:, 0, 0]
|
||||
|
@ -176,7 +182,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
||||
neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
||||
|
||||
arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
|
||||
y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
|
||||
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
|
||||
|
||||
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||
|
@ -236,7 +243,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
|
||||
try:
|
||||
neg_peaks_max=np.max(y_padded_smoothed[peaks])
|
||||
arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
|
||||
y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
|
||||
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
|
||||
|
||||
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||
|
@ -313,23 +321,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
|
||||
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_down =y_max_cont-1
|
||||
##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
|
||||
#point_up
|
||||
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_down =y_max_cont-1
|
||||
##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
|
||||
#point_up
|
||||
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
1.4 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./2)
|
||||
else:
|
||||
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
|
||||
##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
|
||||
##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
|
@ -338,7 +359,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
point_down_narrow = img_patch.shape[0] - 2
|
||||
|
||||
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
|
||||
True)
|
||||
for mj in range(len(xv))]
|
||||
distances = np.array(distances)
|
||||
|
||||
|
@ -465,7 +488,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
|
||||
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
|
||||
True)
|
||||
for mj in range(len(xv))]
|
||||
distances = np.array(distances)
|
||||
|
||||
|
@ -540,7 +564,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
|
||||
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
|
||||
True)
|
||||
for mj in range(len(xv))]
|
||||
distances = np.array(distances)
|
||||
|
||||
|
@ -610,7 +635,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|||
|
||||
neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg])
|
||||
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
|
||||
y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
|
||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
||||
|
||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||
|
@ -686,30 +712,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)
|
||||
##+int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1
|
||||
##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
|
||||
#point_up
|
||||
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)
|
||||
##+int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1
|
||||
##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
|
||||
#point_up
|
||||
# np.max(y_cont)
|
||||
#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./2)
|
||||
else:
|
||||
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
|
||||
##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
|
||||
##+int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
|
||||
###-int(dis_to_next_down*1./2)
|
||||
|
||||
if point_down_narrow >= img_patch.shape[0]:
|
||||
point_down_narrow = img_patch.shape[0] - 2
|
||||
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))]
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
|
||||
True)
|
||||
for mj in range(len(xv))]
|
||||
distances = np.array(distances)
|
||||
|
||||
xvinside = xv[distances >= 0]
|
||||
|
@ -798,7 +844,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|||
point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next)
|
||||
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
|
||||
True)
|
||||
for mj in range(len(xv))]
|
||||
distances = np.array(distances)
|
||||
|
||||
|
@ -863,7 +910,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|||
point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down)
|
||||
|
||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
||||
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
|
||||
True)
|
||||
for mj in range(len(xv))]
|
||||
distances = np.array(distances)
|
||||
|
||||
|
@ -947,7 +995,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|||
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
||||
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
||||
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
|
||||
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
|
||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
||||
|
||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||
|
@ -960,8 +1009,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|||
if len(arg_diff_cluster) > 0:
|
||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
|
||||
for i in range(len(arg_diff_cluster) - 1):
|
||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
|
||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
||||
clusters_to_be_deleted.append(
|
||||
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
|
||||
arg_diff_cluster[i + 1] + 1])
|
||||
clusters_to_be_deleted.append(
|
||||
arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
||||
if len(clusters_to_be_deleted) > 0:
|
||||
peaks_new_extra = []
|
||||
for m in range(len(clusters_to_be_deleted)):
|
||||
|
@ -1011,7 +1063,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|||
try:
|
||||
neg_peaks_max = np.max(y_padded_smoothed[peaks])
|
||||
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
|
||||
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
|
||||
y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
|
||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
||||
|
||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||
|
@ -1174,7 +1227,8 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|||
if diff_peaks[i] > cut_off:
|
||||
if not np.isnan(forest[np.argmin(z[forest])]):
|
||||
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
||||
forest = [peaks_neg[i + 1]]
|
||||
forest = []
|
||||
forest.append(peaks_neg[i + 1])
|
||||
if i == (len(peaks_neg) - 1):
|
||||
if not np.isnan(forest[np.argmin(z[forest])]):
|
||||
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
||||
|
@ -1194,7 +1248,8 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|||
if diff_peaks_pos[i] > cut_off:
|
||||
if not np.isnan(forest[np.argmax(z[forest])]):
|
||||
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
||||
forest = [peaks[i + 1]]
|
||||
forest = []
|
||||
forest.append(peaks[i + 1])
|
||||
if i == (len(peaks) - 1):
|
||||
if not np.isnan(forest[np.argmax(z[forest])]):
|
||||
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
||||
|
@ -1246,19 +1301,16 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|||
|
||||
def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines):
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
pixel = 255
|
||||
label = 255
|
||||
min_area = 0
|
||||
max_area = 1
|
||||
|
||||
if len(img_patch.shape) == 3:
|
||||
cnts_images = (img_patch[:, :, 0] == pixel) * 1
|
||||
if img_patch.ndim == 3:
|
||||
cnts_images = (img_patch[:, :, 0] == label) * 1
|
||||
else:
|
||||
cnts_images = (img_patch[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
cnts_images = (img_patch[:, :] == label) * 1
|
||||
_, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
|
||||
contours_imgs, hierarchy = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(thresh,
|
||||
|
@ -1266,14 +1318,12 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
|||
max_area=max_area, min_area=min_area)
|
||||
cont_final = []
|
||||
for i in range(len(contours_imgs)):
|
||||
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3))
|
||||
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255))
|
||||
img_contour = img_contour.astype(np.uint8)
|
||||
img_contour = np.zeros(cnts_images.shape[:2], dtype=np.uint8)
|
||||
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=255)
|
||||
|
||||
img_contour = cv2.dilate(img_contour, kernel, iterations=4)
|
||||
imgrayrot = cv2.cvtColor(img_contour, cv2.COLOR_BGR2GRAY)
|
||||
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
|
||||
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
_, threshrot = cv2.threshold(img_contour, 0, 255, 0)
|
||||
contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
|
||||
##0]
|
||||
|
@ -1285,84 +1335,81 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
|||
|
||||
return None, cont_final
|
||||
|
||||
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
|
||||
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
|
||||
textline_mask = textline_mask.astype(np.uint8)
|
||||
def textline_contours_postprocessing(textline_mask, slope,
|
||||
contour_text_interest, box_ind,
|
||||
add_boxes_coor_into_textlines=False):
|
||||
textline_mask = textline_mask * 255
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel)
|
||||
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel)
|
||||
textline_mask = cv2.erode(textline_mask, kernel, iterations=2)
|
||||
# textline_mask = cv2.erode(textline_mask, kernel, iterations=1)
|
||||
try:
|
||||
x_help = 30
|
||||
y_help = 2
|
||||
|
||||
textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help),
|
||||
textline_mask.shape[1] + int(2 * x_help), 3))
|
||||
textline_mask_help[y_help : y_help + textline_mask.shape[0],
|
||||
x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :])
|
||||
x_help = 30
|
||||
y_help = 2
|
||||
|
||||
dst = rotate_image(textline_mask_help, slope)
|
||||
dst = dst[:, :, 0]
|
||||
dst[dst != 0] = 1
|
||||
textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help),
|
||||
textline_mask.shape[1] + int(2 * x_help)))
|
||||
textline_mask_help[y_help : y_help + textline_mask.shape[0],
|
||||
x_help : x_help + textline_mask.shape[1]] = np.copy(textline_mask[:, :])
|
||||
|
||||
# if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3:
|
||||
# plt.imshow(dst)
|
||||
# plt.show()
|
||||
dst = rotate_image(textline_mask_help, slope)
|
||||
dst[dst != 0] = 1
|
||||
|
||||
contour_text_copy = contour_text_interest.copy()
|
||||
contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0]
|
||||
contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
||||
# if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3:
|
||||
# plt.imshow(dst)
|
||||
# plt.show()
|
||||
|
||||
img_contour = np.zeros((box_ind[3], box_ind[2], 3))
|
||||
img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255))
|
||||
contour_text_copy = contour_text_interest.copy()
|
||||
contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0]
|
||||
contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
||||
|
||||
img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help),
|
||||
img_contour.shape[1] + int(2 * x_help), 3))
|
||||
img_contour_help[y_help : y_help + img_contour.shape[0],
|
||||
x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :])
|
||||
img_contour = np.zeros((box_ind[3], box_ind[2]))
|
||||
img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=255)
|
||||
|
||||
img_contour_rot = rotate_image(img_contour_help, slope)
|
||||
img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help),
|
||||
img_contour.shape[1] + int(2 * x_help)))
|
||||
img_contour_help[y_help : y_help + img_contour.shape[0],
|
||||
x_help : x_help + img_contour.shape[1]] = np.copy(img_contour[:, :])
|
||||
|
||||
img_contour_rot = img_contour_rot.astype(np.uint8)
|
||||
# dst_help = dst_help.astype(np.uint8)
|
||||
imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY)
|
||||
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
|
||||
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
img_contour_rot = rotate_image(img_contour_help, slope)
|
||||
|
||||
len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))]
|
||||
ind_big_con = np.argmax(len_con_text_rot)
|
||||
_, threshrot = cv2.threshold(img_contour_rot, 0, 255, 0)
|
||||
contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
if abs(slope) > 45:
|
||||
_, contours_rotated_clean = separate_lines_vertical_cont(
|
||||
textline_mask, contours_text_rot[ind_big_con], box_ind, slope,
|
||||
add_boxes_coor_into_textlines=add_boxes_coor_into_textlines)
|
||||
else:
|
||||
_, contours_rotated_clean = separate_lines(
|
||||
dst, contours_text_rot[ind_big_con], slope, x_help, y_help)
|
||||
except:
|
||||
contours_rotated_clean = []
|
||||
len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))]
|
||||
ind_big_con = np.argmax(len_con_text_rot)
|
||||
|
||||
if abs(slope) > 45:
|
||||
_, contours_rotated_clean = separate_lines_vertical_cont(
|
||||
textline_mask, contours_text_rot[ind_big_con], box_ind, slope,
|
||||
add_boxes_coor_into_textlines=add_boxes_coor_into_textlines)
|
||||
else:
|
||||
_, contours_rotated_clean = separate_lines(
|
||||
dst, contours_text_rot[ind_big_con], slope, x_help, y_help)
|
||||
|
||||
return contours_rotated_clean
|
||||
|
||||
def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
|
||||
def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
|
||||
if logger is None:
|
||||
logger = getLogger(__package__)
|
||||
if not np.prod(img_crop.shape):
|
||||
return img_crop
|
||||
|
||||
if num_col == 1:
|
||||
num_patches = int(img_path.shape[1] / 200.0)
|
||||
num_patches = int(img_crop.shape[1] / 200.0)
|
||||
else:
|
||||
num_patches = int(img_path.shape[1] / 140.0)
|
||||
# num_patches=int(img_path.shape[1]/200.)
|
||||
num_patches = int(img_crop.shape[1] / 140.0)
|
||||
# num_patches=int(img_crop.shape[1]/200.)
|
||||
if num_patches == 0:
|
||||
num_patches = 1
|
||||
|
||||
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
|
||||
img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
|
||||
|
||||
# plt.imshow(img_patch_ineterst)
|
||||
# plt.imshow(img_patch_interest)
|
||||
# plt.show()
|
||||
|
||||
length_x = int(img_path.shape[1] / float(num_patches))
|
||||
length_x = int(img_crop.shape[1] / float(num_patches))
|
||||
# margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
|
||||
margin = int(0.04 * length_x)
|
||||
# if margin<=4:
|
||||
|
@ -1370,7 +1417,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|||
# margin=0
|
||||
|
||||
width_mid = length_x - 2 * margin
|
||||
nxf = img_path.shape[1] / float(width_mid)
|
||||
nxf = img_crop.shape[1] / float(width_mid)
|
||||
|
||||
if nxf > int(nxf):
|
||||
nxf = int(nxf) + 1
|
||||
|
@ -1386,12 +1433,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + length_x
|
||||
|
||||
if index_x_u > img_path.shape[1]:
|
||||
index_x_u = img_path.shape[1]
|
||||
index_x_d = img_path.shape[1] - length_x
|
||||
if index_x_u > img_crop.shape[1]:
|
||||
index_x_u = img_crop.shape[1]
|
||||
index_x_d = img_crop.shape[1] - length_x
|
||||
|
||||
# img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
|
||||
img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
|
||||
img_xline = img_patch_interest[:, index_x_d:index_x_u]
|
||||
|
||||
try:
|
||||
assert img_xline.any()
|
||||
|
@ -1407,9 +1454,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|||
img_line_rotated = rotate_image(img_xline, slope_xline)
|
||||
img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
|
||||
|
||||
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
|
||||
img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
|
||||
|
||||
img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
|
||||
img_patch_interest_revised = np.zeros(img_patch_interest.shape)
|
||||
|
||||
for i in range(nxf):
|
||||
if i == 0:
|
||||
|
@ -1419,18 +1466,18 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + length_x
|
||||
|
||||
if index_x_u > img_path.shape[1]:
|
||||
index_x_u = img_path.shape[1]
|
||||
index_x_d = img_path.shape[1] - length_x
|
||||
if index_x_u > img_crop.shape[1]:
|
||||
index_x_u = img_crop.shape[1]
|
||||
index_x_d = img_crop.shape[1] - length_x
|
||||
|
||||
img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
|
||||
img_xline = img_patch_interest[:, index_x_d:index_x_u]
|
||||
|
||||
img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
|
||||
img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0]
|
||||
|
||||
img_resized = np.zeros((int(img_int.shape[0] * 1.2), int(img_int.shape[1] * 3)))
|
||||
img_resized[int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0],
|
||||
int(img_int.shape[1] * 1.0): int(img_int.shape[1] * 1.0) + img_int.shape[1]] = img_int[:, :]
|
||||
img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3))))
|
||||
img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0],
|
||||
int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] = img_int[:, :]
|
||||
# plt.imshow(img_xline)
|
||||
# plt.show()
|
||||
img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i])
|
||||
|
@ -1442,15 +1489,16 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|||
img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1
|
||||
|
||||
img_patch_separated_returned_true_size = img_patch_separated_returned[
|
||||
int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0],
|
||||
int(img_int.shape[1] * 1.0): int(img_int.shape[1] * 1.0) + img_int.shape[1]]
|
||||
int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0],
|
||||
int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
|
||||
|
||||
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
|
||||
img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
|
||||
img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
|
||||
|
||||
return img_patch_ineterst_revised
|
||||
return img_patch_interest_revised
|
||||
|
||||
def do_image_rotation(angle, img, sigma_des, logger=None):
|
||||
@wrap_ndarray_shared(kw='img')
|
||||
def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
|
||||
if logger is None:
|
||||
logger = getLogger(__package__)
|
||||
img_rot = rotate_image(img, angle)
|
||||
|
@ -1463,7 +1511,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None):
|
|||
return var
|
||||
|
||||
def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
|
||||
main_page=False, logger=None, plotter=None, map=map):
|
||||
main_page=False, logger=None, plotter=None, map=None):
|
||||
if main_page and plotter:
|
||||
plotter.save_plot_of_textline_density(img_patch_org)
|
||||
|
||||
|
@ -1471,165 +1519,81 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
|
|||
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
|
||||
|
||||
max_shape=np.max(img_int.shape)
|
||||
img_resized=np.zeros((int(max_shape * 1.1) , int(max_shape * 1.1)))
|
||||
img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) ))
|
||||
|
||||
onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.)
|
||||
onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
|
||||
|
||||
#img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
|
||||
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
|
||||
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0],
|
||||
# int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
|
||||
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
|
||||
|
||||
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
|
||||
angles = np.array([-45, 0, 45, 90,])
|
||||
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
|
||||
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
|
||||
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
elif main_page:
|
||||
angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
|
||||
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
#angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
|
||||
angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4),
|
||||
np.linspace(-6, 6, n_tot_angles // 2),
|
||||
np.linspace(7, 12, n_tot_angles // 4)))
|
||||
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
|
||||
early_slope_edge=11
|
||||
if abs(angle) > early_slope_edge:
|
||||
if angle < 0:
|
||||
angles = np.linspace(-90, -12, n_tot_angles)
|
||||
angles2 = np.linspace(-90, -12, n_tot_angles)
|
||||
else:
|
||||
angles = np.linspace(90, 12, n_tot_angles)
|
||||
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
angles2 = np.linspace(90, 12, n_tot_angles)
|
||||
angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
|
||||
if var2 > var:
|
||||
angle = angle2
|
||||
else:
|
||||
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
|
||||
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
|
||||
early_slope_edge=22
|
||||
if abs(angle) > early_slope_edge:
|
||||
if angle < 0:
|
||||
angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
|
||||
angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
|
||||
else:
|
||||
angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
|
||||
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
|
||||
angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
|
||||
angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
|
||||
if var2 > var:
|
||||
angle = angle2
|
||||
return angle
|
||||
|
||||
def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
|
||||
if logger is None:
|
||||
logger = getLogger(__package__)
|
||||
results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles))
|
||||
if map is None:
|
||||
results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger)
|
||||
for angle in angles]
|
||||
else:
|
||||
with share_ndarray(img) as img_shared:
|
||||
results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None),
|
||||
angles))
|
||||
if plotter:
|
||||
plotter.save_plot_of_rotation_angle(angles, results)
|
||||
try:
|
||||
var_res = np.array(results)
|
||||
assert var_res.any()
|
||||
angle = angles[np.argmax(var_res)]
|
||||
idx = np.argmax(var_res)
|
||||
angle = angles[idx]
|
||||
var = var_res[idx]
|
||||
except:
|
||||
logger.exception("cannot determine best angle among %s", str(angles))
|
||||
angle = 0
|
||||
return angle
|
||||
|
||||
|
||||
def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100,
|
||||
main_page=False, logger=None, plotter=None):
|
||||
if main_page and plotter:
|
||||
plotter.save_plot_of_textline_density(img_patch_org)
|
||||
|
||||
img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
|
||||
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
|
||||
|
||||
max_shape=np.max(img_int.shape)
|
||||
img_resized=np.zeros((int(max_shape * 1.1) , int(max_shape * 1.1)))
|
||||
|
||||
onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.)
|
||||
onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
|
||||
|
||||
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
|
||||
|
||||
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
|
||||
angles = np.array([-45, 0, 45, 90,])
|
||||
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
|
||||
|
||||
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
|
||||
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
|
||||
elif main_page:
|
||||
angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
|
||||
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
|
||||
|
||||
early_slope_edge=11
|
||||
if abs(angle) > early_slope_edge:
|
||||
if angle < 0:
|
||||
angles = np.linspace(-90, -12, n_tot_angles)
|
||||
else:
|
||||
angles = np.linspace(90, 12, n_tot_angles)
|
||||
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
|
||||
else:
|
||||
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
|
||||
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
|
||||
|
||||
early_slope_edge=22
|
||||
if abs(angle) > early_slope_edge:
|
||||
if angle < 0:
|
||||
angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
|
||||
else:
|
||||
angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
|
||||
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
|
||||
|
||||
return angle
|
||||
|
||||
def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des):
|
||||
vars_per_each_subprocess = []
|
||||
angles_per_each_subprocess = []
|
||||
for mv in range(len(angles_per_process)):
|
||||
img_rot=rotate_image(img_resized,angles_per_process[mv])
|
||||
img_rot[img_rot!=0]=1
|
||||
try:
|
||||
var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 )
|
||||
except:
|
||||
var_spectrum=0
|
||||
vars_per_each_subprocess.append(var_spectrum)
|
||||
angles_per_each_subprocess.append(angles_per_process[mv])
|
||||
|
||||
queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess])
|
||||
|
||||
def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
|
||||
num_cores = cpu_count()
|
||||
|
||||
queue_of_all_params = Queue()
|
||||
processes = []
|
||||
nh = np.linspace(0, len(angles), num_cores + 1)
|
||||
|
||||
for i in range(num_cores):
|
||||
angles_per_process = angles[int(nh[i]) : int(nh[i + 1])]
|
||||
processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des)))
|
||||
|
||||
for i in range(num_cores):
|
||||
processes[i].start()
|
||||
|
||||
var_res=[]
|
||||
all_angles = []
|
||||
for i in range(num_cores):
|
||||
list_all_par = queue_of_all_params.get(True)
|
||||
vars_for_subprocess = list_all_par[0]
|
||||
angles_sub_process = list_all_par[1]
|
||||
for j in range(len(vars_for_subprocess)):
|
||||
var_res.append(vars_for_subprocess[j])
|
||||
all_angles.append(angles_sub_process[j])
|
||||
|
||||
for i in range(num_cores):
|
||||
processes[i].join()
|
||||
|
||||
if plotter:
|
||||
plotter.save_plot_of_rotation_angle(all_angles, var_res)
|
||||
|
||||
|
||||
try:
|
||||
var_res=np.array(var_res)
|
||||
ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
|
||||
except:
|
||||
ang_int=0
|
||||
return ang_int
|
||||
var = 0
|
||||
return angle, var
|
||||
|
||||
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
|
||||
def do_work_of_slopes_new(
|
||||
box_text, contour, contour_par, index_r_con,
|
||||
textline_mask_tot_ea, image_page_rotated, slope_deskew,
|
||||
box_text, contour, contour_par,
|
||||
textline_mask_tot_ea=None, slope_deskew=0.0,
|
||||
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
|
||||
):
|
||||
if KERNEL is None:
|
||||
|
@ -1639,7 +1603,7 @@ def do_work_of_slopes_new(
|
|||
logger.debug('enter do_work_of_slopes_new')
|
||||
|
||||
x, y, w, h = box_text
|
||||
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
|
||||
crop_coor = box2rect(box_text)
|
||||
mask_textline = np.zeros(textline_mask_tot_ea.shape)
|
||||
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
|
||||
all_text_region_raw = textline_mask_tot_ea * mask_textline
|
||||
|
@ -1647,7 +1611,7 @@ def do_work_of_slopes_new(
|
|||
img_int_p = all_text_region_raw[:,:]
|
||||
img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
|
||||
|
||||
if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
|
||||
if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
|
||||
slope = 0
|
||||
slope_for_all = slope_deskew
|
||||
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
|
||||
|
@ -1683,11 +1647,14 @@ def do_work_of_slopes_new(
|
|||
all_text_region_raw[mask_only_con_region == 0] = 0
|
||||
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
|
||||
|
||||
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
|
||||
return cnt_clean_rot, crop_coor, slope
|
||||
|
||||
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
|
||||
@wrap_ndarray_shared(kw='mask_texts_only')
|
||||
def do_work_of_slopes_new_curved(
|
||||
box_text, contour, contour_par, index_r_con,
|
||||
textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
|
||||
box_text, contour_par,
|
||||
textline_mask_tot_ea=None, mask_texts_only=None,
|
||||
num_col=1, scale_par=1.0, slope_deskew=0.0,
|
||||
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
|
||||
):
|
||||
if KERNEL is None:
|
||||
|
@ -1704,7 +1671,7 @@ def do_work_of_slopes_new_curved(
|
|||
# plt.imshow(img_int_p)
|
||||
# plt.show()
|
||||
|
||||
if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
|
||||
if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
|
||||
slope = 0
|
||||
slope_for_all = slope_deskew
|
||||
else:
|
||||
|
@ -1730,7 +1697,7 @@ def do_work_of_slopes_new_curved(
|
|||
slope_for_all = slope_deskew
|
||||
slope = slope_for_all
|
||||
|
||||
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
|
||||
crop_coor = box2rect(box_text)
|
||||
|
||||
if abs(slope_for_all) < 45:
|
||||
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
|
||||
|
@ -1763,20 +1730,25 @@ def do_work_of_slopes_new_curved(
|
|||
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
|
||||
|
||||
pixel_img = 1
|
||||
mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
|
||||
mask_biggest2 = resize_image(mask_biggest2,
|
||||
int(mask_biggest2.shape[0] * scale_par),
|
||||
int(mask_biggest2.shape[1] * scale_par))
|
||||
cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
|
||||
try:
|
||||
textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
|
||||
except Exception as why:
|
||||
logger.error(why)
|
||||
else:
|
||||
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True)
|
||||
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw,
|
||||
slope_for_all, contour_par,
|
||||
box_text, True)
|
||||
|
||||
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
|
||||
return textlines_cnt_per_region[::-1], crop_coor, slope
|
||||
|
||||
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
|
||||
def do_work_of_slopes_new_light(
|
||||
box_text, contour, contour_par, index_r_con,
|
||||
textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,
|
||||
box_text, contour, contour_par,
|
||||
textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
|
||||
logger=None
|
||||
):
|
||||
if logger is None:
|
||||
|
@ -1784,7 +1756,7 @@ def do_work_of_slopes_new_light(
|
|||
logger.debug('enter do_work_of_slopes_new_light')
|
||||
|
||||
x, y, w, h = box_text
|
||||
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
|
||||
crop_coor = box2rect(box_text)
|
||||
mask_textline = np.zeros(textline_mask_tot_ea.shape)
|
||||
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
|
||||
all_text_region_raw = textline_mask_tot_ea * mask_textline
|
||||
|
@ -1805,4 +1777,4 @@ def do_work_of_slopes_new_light(
|
|||
all_text_region_raw[mask_only_con_region == 0] = 0
|
||||
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text)
|
||||
|
||||
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew
|
||||
return cnt_clean_rot, crop_coor, slope_deskew
|
||||
|
|
45
src/eynollah/utils/shm.py
Normal file
45
src/eynollah/utils/shm.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
from multiprocessing import shared_memory
|
||||
from contextlib import contextmanager
|
||||
from functools import wraps
|
||||
import numpy as np
|
||||
|
||||
@contextmanager
|
||||
def share_ndarray(array: np.ndarray):
|
||||
size = np.dtype(array.dtype).itemsize * np.prod(array.shape)
|
||||
shm = shared_memory.SharedMemory(create=True, size=size)
|
||||
try:
|
||||
shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf)
|
||||
shared_array[:] = array[:]
|
||||
shared_array.flags["WRITEABLE"] = False
|
||||
yield dict(shape=array.shape, dtype=array.dtype, name=shm.name)
|
||||
finally:
|
||||
shm.close()
|
||||
shm.unlink()
|
||||
|
||||
@contextmanager
|
||||
def ndarray_shared(array: dict):
|
||||
shm = shared_memory.SharedMemory(name=array['name'])
|
||||
try:
|
||||
array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf)
|
||||
yield array
|
||||
finally:
|
||||
shm.close()
|
||||
|
||||
def wrap_ndarray_shared(kw=None):
|
||||
def wrapper(f):
|
||||
if kw is None:
|
||||
@wraps(f)
|
||||
def shared_func(array, *args, **kwargs):
|
||||
with ndarray_shared(array) as ndarray:
|
||||
return f(ndarray, *args, **kwargs)
|
||||
return shared_func
|
||||
else:
|
||||
@wraps(f)
|
||||
def shared_func(*args, **kwargs):
|
||||
array = kwargs.pop(kw)
|
||||
with ndarray_shared(array) as ndarray:
|
||||
kwargs[kw] = ndarray
|
||||
return f(*args, **kwargs)
|
||||
return shared_func
|
||||
return wrapper
|
||||
|
|
@ -1,13 +1,17 @@
|
|||
import math
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import tensorflow as tf
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
import math
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from Bio import pairwise2
|
||||
|
||||
from .resize import resize_image
|
||||
|
||||
|
||||
def decode_batch_predictions(pred, num_to_char, max_len = 128):
|
||||
# input_len is the product of the batch size and the
|
||||
# number of time steps.
|
||||
|
@ -92,6 +96,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t
|
|||
return peaks_final
|
||||
else:
|
||||
return None
|
||||
|
||||
# Function to fit text inside the given area
|
||||
def fit_text_single_line(draw, text, font_path, max_width, max_height):
|
||||
initial_font_size = 50
|
||||
|
@ -369,7 +374,13 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind
|
|||
return textline_contour
|
||||
|
||||
|
||||
def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
|
||||
def return_rnn_cnn_ocr_of_given_textlines(image,
|
||||
all_found_textline_polygons,
|
||||
all_box_coord,
|
||||
prediction_model,
|
||||
b_s_ocr, num_to_char,
|
||||
textline_light=False,
|
||||
curved_line=False):
|
||||
max_len = 512
|
||||
padding_token = 299
|
||||
image_width = 512#max_len * 4
|
||||
|
@ -425,17 +436,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
|
|||
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
|
||||
|
||||
if splited_images:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0],
|
||||
image_height,
|
||||
image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(1)
|
||||
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1],
|
||||
image_height,
|
||||
image_width)
|
||||
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(-1)
|
||||
|
||||
else:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop,
|
||||
image_height,
|
||||
image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
|
||||
|
@ -468,7 +485,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
|
|||
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
|
||||
extracted_texts.append(pred_texts_ib)
|
||||
|
||||
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
|
||||
extracted_texts_merged = [extracted_texts[ind]
|
||||
if cropped_lines_meging_indexing[ind]==0
|
||||
else extracted_texts[ind]+" "+extracted_texts[ind+1]
|
||||
if cropped_lines_meging_indexing[ind]==1
|
||||
else None
|
||||
for ind in range(len(cropped_lines_meging_indexing))]
|
||||
|
||||
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||
|
|
|
@ -57,19 +57,15 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_margina
|
|||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
||||
region_counter.inc('region')
|
||||
|
||||
for idx_textregion, _ in enumerate(order_of_texts):
|
||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1)))
|
||||
for idx_textregion in order_of_texts:
|
||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(idx_textregion + 1)))
|
||||
region_counter.inc('region')
|
||||
|
||||
for id_marginal in id_of_marginalia_right:
|
||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
||||
region_counter.inc('region')
|
||||
|
||||
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
||||
indexes_sorted = np.array(indexes_sorted)
|
||||
index_of_types = np.array(index_of_types)
|
||||
kind_of_texts = np.array(kind_of_texts)
|
||||
|
||||
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
||||
id_of_texts = []
|
||||
order_of_texts = []
|
||||
|
||||
|
|
|
@ -56,113 +56,30 @@ class EynollahXmlWriter:
|
|||
points_page_print = points_page_print + ' '
|
||||
return points_page_print[:-1]
|
||||
|
||||
def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion):
|
||||
for j in range(len(all_found_textline_polygons_marginals[marginal_idx])):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
if ocr_all_textlines_textregion:
|
||||
textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
|
||||
marginal_region.add_TextLine(textline)
|
||||
marginal_region.set_orientation(-slopes_marginals[marginal_idx])
|
||||
points_co = ''
|
||||
for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])):
|
||||
if not (self.curved_line or self.textline_light):
|
||||
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||
textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||
else:
|
||||
textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||
textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||
points_co += str(textline_x_coord)
|
||||
points_co += ','
|
||||
points_co += str(textline_y_coord)
|
||||
if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45:
|
||||
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
|
||||
|
||||
elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45:
|
||||
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
||||
points_co += ' '
|
||||
coords.set_points(points_co[:-1])
|
||||
|
||||
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
|
||||
self.logger.debug('enter serialize_lines_in_region')
|
||||
for j in range(len(all_found_textline_polygons[region_idx])):
|
||||
for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
if ocr_all_textlines_textregion:
|
||||
textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
|
||||
# FIXME: add OCR confidence
|
||||
textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])])
|
||||
text_region.add_TextLine(textline)
|
||||
text_region.set_orientation(-slopes[region_idx])
|
||||
region_bboxes = all_box_coord[region_idx]
|
||||
points_co = ''
|
||||
for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]):
|
||||
if not (self.curved_line or self.textline_light):
|
||||
if len(contour_textline) == 2:
|
||||
textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x))
|
||||
textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x))
|
||||
textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y))
|
||||
points_co += str(textline_x_coord)
|
||||
points_co += ','
|
||||
points_co += str(textline_y_coord)
|
||||
|
||||
if self.textline_light or (self.curved_line and np.abs(slopes[region_idx]) <= 45):
|
||||
if len(contour_textline) == 2:
|
||||
points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y))
|
||||
elif self.curved_line and np.abs(slopes[region_idx]) > 45:
|
||||
if len(contour_textline)==2:
|
||||
points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0])/self.scale_y))
|
||||
else:
|
||||
points_co += str(int((contour_textline[0][0] + region_bboxes[2]+page_coord[2])/self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y))
|
||||
points_co += ' '
|
||||
coords.set_points(points_co[:-1])
|
||||
|
||||
def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
|
||||
self.logger.debug('enter serialize_lines_in_region')
|
||||
for j in range(1):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
if ocr_all_textlines_textregion:
|
||||
textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
|
||||
text_region.add_TextLine(textline)
|
||||
#region_bboxes = all_box_coord[region_idx]
|
||||
points_co = ''
|
||||
for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]):
|
||||
if len(contour_textline) == 2:
|
||||
points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y))
|
||||
|
||||
points_co += ' '
|
||||
for point in polygon_textline:
|
||||
if len(point) != 2:
|
||||
point = point[0]
|
||||
point_x = point[0] + page_coord[2]
|
||||
point_y = point[1] + page_coord[0]
|
||||
# FIXME: or actually... not self.textline_light and not self.curved_line or np.abs(slopes[region_idx]) > 45?
|
||||
if not self.textline_light and not (self.curved_line and np.abs(slopes[region_idx]) <= 45):
|
||||
point_x += region_bboxes[2]
|
||||
point_y += region_bboxes[0]
|
||||
point_x = max(0, int(point_x / self.scale_x))
|
||||
point_y = max(0, int(point_y / self.scale_y))
|
||||
points_co += str(point_x) + ',' + str(point_y) + ' '
|
||||
coords.set_points(points_co[:-1])
|
||||
|
||||
def write_pagexml(self, pcgts):
|
||||
|
@ -170,8 +87,50 @@ class EynollahXmlWriter:
|
|||
with open(self.output_filename, 'w') as f:
|
||||
f.write(to_xml(pcgts))
|
||||
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False):
|
||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||
def build_pagexml_no_full_layout(
|
||||
self, found_polygons_text_region,
|
||||
page_coord, order_of_texts, id_of_texts,
|
||||
all_found_textline_polygons,
|
||||
all_box_coord,
|
||||
found_polygons_text_region_img,
|
||||
found_polygons_marginals_left, found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines,
|
||||
found_polygons_tables,
|
||||
**kwargs):
|
||||
return self.build_pagexml_full_layout(
|
||||
found_polygons_text_region, [],
|
||||
page_coord, order_of_texts, id_of_texts,
|
||||
all_found_textline_polygons, [],
|
||||
all_box_coord, [],
|
||||
found_polygons_text_region_img, found_polygons_tables, [],
|
||||
found_polygons_marginals_left, found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, [], slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines,
|
||||
**kwargs)
|
||||
|
||||
def build_pagexml_full_layout(
|
||||
self,
|
||||
found_polygons_text_region, found_polygons_text_region_h,
|
||||
page_coord, order_of_texts, id_of_texts,
|
||||
all_found_textline_polygons, all_found_textline_polygons_h,
|
||||
all_box_coord, all_box_coord_h,
|
||||
found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,
|
||||
found_polygons_marginals_left,found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines,
|
||||
ocr_all_textlines=None, ocr_all_textlines_h=None,
|
||||
ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None,
|
||||
ocr_all_textlines_drop=None,
|
||||
conf_contours_textregions=None, conf_contours_textregions_h=None,
|
||||
skip_layout_reading_order=False):
|
||||
self.logger.debug('enter build_pagexml')
|
||||
|
||||
# create the file structure
|
||||
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||
|
@ -179,191 +138,116 @@ class EynollahXmlWriter:
|
|||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||
|
||||
counter = EynollahIdCounter()
|
||||
if len(found_polygons_text_region) > 0:
|
||||
if len(order_of_texts):
|
||||
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
|
||||
id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
|
||||
id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
|
||||
id_of_marginalia_left = [_counter_marginals.next_region_id
|
||||
for _ in found_polygons_marginals_left]
|
||||
id_of_marginalia_right = [_counter_marginals.next_region_id
|
||||
for _ in found_polygons_marginals_right]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
|
||||
|
||||
for mm in range(len(found_polygons_text_region)):
|
||||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
|
||||
)
|
||||
#textregion.set_conf(conf_contours_textregion[mm])
|
||||
for mm, region_contour in enumerate(found_polygons_text_region):
|
||||
textregion = TextRegionType(
|
||||
id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord,
|
||||
skip_layout_reading_order))
|
||||
)
|
||||
if conf_contours_textregions:
|
||||
textregion.Coords.set_conf(conf_contours_textregions[mm])
|
||||
page.add_TextRegion(textregion)
|
||||
if ocr_all_textlines:
|
||||
ocr_textlines = ocr_all_textlines[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals_left)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_left:
|
||||
ocr_textlines = ocr_all_textlines_marginals_left[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
|
||||
#print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) )
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals_right)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_right:
|
||||
ocr_textlines = ocr_all_textlines_marginals_right[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_text_region_img)):
|
||||
img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType())
|
||||
page.add_ImageRegion(img_region)
|
||||
points_co = ''
|
||||
for lmm in range(len(found_polygons_text_region_img[mm])):
|
||||
try:
|
||||
points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
|
||||
points_co += ' '
|
||||
except:
|
||||
|
||||
points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x ))
|
||||
points_co += ','
|
||||
points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y ))
|
||||
points_co += ' '
|
||||
|
||||
img_region.get_Coords().set_points(points_co[:-1])
|
||||
|
||||
for mm in range(len(polygons_lines_to_be_written_in_xml)):
|
||||
sep_hor = SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType())
|
||||
page.add_SeparatorRegion(sep_hor)
|
||||
points_co = ''
|
||||
for lmm in range(len(polygons_lines_to_be_written_in_xml[mm])):
|
||||
points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,0] ) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,1] ) / self.scale_y))
|
||||
points_co += ' '
|
||||
sep_hor.get_Coords().set_points(points_co[:-1])
|
||||
for mm in range(len(found_polygons_tables)):
|
||||
tab_region = TableRegionType(id=counter.next_region_id, Coords=CoordsType())
|
||||
page.add_TableRegion(tab_region)
|
||||
points_co = ''
|
||||
for lmm in range(len(found_polygons_tables[mm])):
|
||||
points_co += str(int((found_polygons_tables[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((found_polygons_tables[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
|
||||
points_co += ' '
|
||||
tab_region.get_Coords().set_points(points_co[:-1])
|
||||
|
||||
return pcgts
|
||||
|
||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
|
||||
self.logger.debug('enter build_pagexml_full_layout')
|
||||
|
||||
# create the file structure
|
||||
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||
page = pcgts.get_Page()
|
||||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||
|
||||
counter = EynollahIdCounter()
|
||||
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
|
||||
id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
|
||||
id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
|
||||
|
||||
for mm in range(len(found_polygons_text_region)):
|
||||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]))
|
||||
page.add_TextRegion(textregion)
|
||||
|
||||
if ocr_all_textlines:
|
||||
ocr_textlines = ocr_all_textlines[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines)
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord,
|
||||
all_box_coord, slopes, counter, ocr_textlines)
|
||||
|
||||
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
|
||||
for mm in range(len(found_polygons_text_region_h)):
|
||||
textregion = TextRegionType(id=counter.next_region_id, type_='header',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
|
||||
for mm, region_contour in enumerate(found_polygons_text_region_h):
|
||||
textregion = TextRegionType(
|
||||
id=counter.next_region_id, type_='heading',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
if conf_contours_textregions_h:
|
||||
textregion.Coords.set_conf(conf_contours_textregions_h[mm])
|
||||
page.add_TextRegion(textregion)
|
||||
|
||||
if ocr_all_textlines_h:
|
||||
ocr_textlines = ocr_all_textlines_h[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines)
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord,
|
||||
all_box_coord_h, slopes_h, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals_left)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
|
||||
for mm, region_contour in enumerate(found_polygons_marginals_left):
|
||||
marginal = TextRegionType(
|
||||
id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_left:
|
||||
ocr_textlines = ocr_all_textlines_marginals_left[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals_right)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
|
||||
for mm, region_contour in enumerate(found_polygons_marginals_right):
|
||||
marginal = TextRegionType(
|
||||
id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_right:
|
||||
ocr_textlines = ocr_all_textlines_marginals_right[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord,
|
||||
all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_drop_capitals)):
|
||||
dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))
|
||||
for mm, region_contour in enumerate(found_polygons_drop_capitals):
|
||||
dropcapital = TextRegionType(
|
||||
id=counter.next_region_id, type_='drop-capital',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_TextRegion(dropcapital)
|
||||
all_box_coord_drop = None
|
||||
slopes_drop = None
|
||||
all_box_coord_drop = [[0, 0, 0, 0]]
|
||||
slopes_drop = [0]
|
||||
if ocr_all_textlines_drop:
|
||||
ocr_textlines = ocr_all_textlines_drop[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines)
|
||||
self.serialize_lines_in_region(dropcapital, [[found_polygons_drop_capitals[mm]]], 0, page_coord,
|
||||
all_box_coord_drop, slopes_drop, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_text_region_img)):
|
||||
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
|
||||
for region_contour in found_polygons_text_region_img:
|
||||
page.add_ImageRegion(
|
||||
ImageRegionType(id=counter.next_region_id,
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))))
|
||||
|
||||
for mm in range(len(polygons_lines_to_be_written_in_xml)):
|
||||
page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
|
||||
for region_contour in polygons_seplines:
|
||||
page.add_SeparatorRegion(
|
||||
SeparatorRegionType(id=counter.next_region_id,
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, [0, 0, 0, 0]))))
|
||||
|
||||
for mm in range(len(found_polygons_tables)):
|
||||
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))
|
||||
for region_contour in found_polygons_tables:
|
||||
page.add_TableRegion(
|
||||
TableRegionType(id=counter.next_region_id,
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))))
|
||||
|
||||
return pcgts
|
||||
|
||||
def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
|
||||
self.logger.debug('enter calculate_polygon_coords')
|
||||
coords = ''
|
||||
for value_bbox in contour:
|
||||
if skip_layout_reading_order:
|
||||
if len(value_bbox) == 2:
|
||||
coords += str(int((value_bbox[0]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[1]) / self.scale_y))
|
||||
else:
|
||||
coords += str(int((value_bbox[0][0]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[0][1]) / self.scale_y))
|
||||
else:
|
||||
if len(value_bbox) == 2:
|
||||
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
|
||||
coords=coords + ' '
|
||||
for point in contour:
|
||||
if len(point) != 2:
|
||||
point = point[0]
|
||||
point_x = point[0]
|
||||
point_y = point[1]
|
||||
if not skip_layout_reading_order:
|
||||
point_x += page_coord[2]
|
||||
point_y += page_coord[0]
|
||||
point_x = int(point_x / self.scale_x)
|
||||
point_y = int(point_y / self.scale_y)
|
||||
coords += str(point_x) + ',' + str(point_y) + ' '
|
||||
return coords[:-1]
|
||||
|
||||
|
|
|
@ -17,18 +17,35 @@ from ocrd_models.constants import NAMESPACES as NS
|
|||
testdir = Path(__file__).parent.resolve()
|
||||
|
||||
MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))
|
||||
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
|
||||
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve()))
|
||||
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
|
||||
|
||||
def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
[
|
||||
[], # defaults
|
||||
#["--allow_scaling", "--curved-line"],
|
||||
["--allow_scaling", "--curved-line", "--full-layout"],
|
||||
["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"],
|
||||
["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based",
|
||||
"--textline_light", "--light_version"],
|
||||
# -ep ...
|
||||
# -eoi ...
|
||||
# FIXME: find out whether OCR extra was installed, otherwise skip these
|
||||
["--do_ocr"],
|
||||
["--do_ocr", "--light_version", "--textline_light"],
|
||||
["--do_ocr", "--transformer_ocr"],
|
||||
#["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light"],
|
||||
["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light", "--full-layout"],
|
||||
# --skip_layout_and_reading_order
|
||||
], ids=str)
|
||||
def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
|
||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
|
||||
args = [
|
||||
'-m', MODELS_LAYOUT,
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
# subtests write to same location
|
||||
'--overwrite',
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
|
@ -36,33 +53,57 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
|
|||
def only_eynollah(logrec):
|
||||
return logrec.name == 'eynollah'
|
||||
runner = CliRunner()
|
||||
for options in [
|
||||
[], # defaults
|
||||
["--allow_scaling", "--curved-line"],
|
||||
["--allow_scaling", "--curved-line", "--full-layout"],
|
||||
["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"],
|
||||
["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based",
|
||||
"--textline_light", "--light_version"],
|
||||
# -ep ...
|
||||
# -eoi ...
|
||||
# --do_ocr
|
||||
# --skip_layout_and_reading_order
|
||||
]:
|
||||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert str(infile) in logmsgs
|
||||
assert outfile.exists()
|
||||
tree = page_from_file(str(outfile)).etree
|
||||
regions = tree.xpath("//page:TextRegion", namespaces=NS)
|
||||
assert len(regions) >= 2, "result is inaccurate"
|
||||
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
|
||||
assert len(regions) >= 2, "result is inaccurate"
|
||||
lines = tree.xpath("//page:TextLine", namespaces=NS)
|
||||
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert str(infile) in logmsgs
|
||||
assert outfile.exists()
|
||||
tree = page_from_file(str(outfile)).etree
|
||||
regions = tree.xpath("//page:TextRegion", namespaces=NS)
|
||||
assert len(regions) >= 2, "result is inaccurate"
|
||||
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
|
||||
assert len(regions) >= 2, "result is inaccurate"
|
||||
lines = tree.xpath("//page:TextLine", namespaces=NS)
|
||||
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
[
|
||||
["--tables"],
|
||||
["--tables", "--full-layout"],
|
||||
["--tables", "--full-layout", "--textline_light", "--light_version"],
|
||||
], ids=str)
|
||||
def test_run_eynollah_layout_filename2(tmp_path, pytestconfig, caplog, options):
|
||||
infile = testdir.joinpath('resources/euler_rechenkunst01_1738_0025.tif')
|
||||
outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml'
|
||||
args = [
|
||||
'-m', MODELS_LAYOUT,
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
caplog.set_level(logging.INFO)
|
||||
def only_eynollah(logrec):
|
||||
return logrec.name == 'eynollah'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert str(infile) in logmsgs
|
||||
assert outfile.exists()
|
||||
tree = page_from_file(str(outfile)).etree
|
||||
regions = tree.xpath("//page:TextRegion", namespaces=NS)
|
||||
assert len(regions) >= 2, "result is inaccurate"
|
||||
regions = tree.xpath("//page:TableRegion", namespaces=NS)
|
||||
# model/decoding is not very precise, so (depending on mode) we can get fractures/splits/FP
|
||||
assert len(regions) >= 1, "result is inaccurate"
|
||||
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
|
||||
assert len(regions) >= 2, "result is inaccurate"
|
||||
lines = tree.xpath("//page:TextLine", namespaces=NS)
|
||||
assert len(lines) >= 2, "result is inaccurate" # mostly table (if detected correctly), but 1 page and 1 catch-word line
|
||||
|
||||
def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
|
||||
indir = testdir.joinpath('resources')
|
||||
|
@ -86,7 +127,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
|
|||
assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
|
||||
assert len(list(outdir.iterdir())) == 2
|
||||
|
||||
def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
[
|
||||
[], # defaults
|
||||
["--no-patches"],
|
||||
], ids=str)
|
||||
def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options):
|
||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
||||
args = [
|
||||
|
@ -100,25 +147,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
|
|||
def only_eynollah(logrec):
|
||||
return logrec.name == 'SbbBinarizer'
|
||||
runner = CliRunner()
|
||||
for options in [
|
||||
[], # defaults
|
||||
["--no-patches"],
|
||||
]:
|
||||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
|
||||
assert outfile.exists()
|
||||
with Image.open(infile) as original_img:
|
||||
original_size = original_img.size
|
||||
with Image.open(outfile) as binarized_img:
|
||||
binarized_size = binarized_img.size
|
||||
assert original_size == binarized_size
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
|
||||
assert outfile.exists()
|
||||
with Image.open(infile) as original_img:
|
||||
original_size = original_img.size
|
||||
with Image.open(outfile) as binarized_img:
|
||||
binarized_size = binarized_img.size
|
||||
assert original_size == binarized_size
|
||||
|
||||
def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog):
|
||||
def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
|
||||
indir = testdir.joinpath('resources')
|
||||
outdir = tmp_path
|
||||
args = [
|
||||
|
@ -139,15 +180,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
|
|||
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
|
||||
assert len(list(outdir.iterdir())) == 2
|
||||
|
||||
def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
[
|
||||
[], # defaults
|
||||
["-sos"],
|
||||
], ids=str)
|
||||
def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options):
|
||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
||||
args = [
|
||||
'-m', MODELS_LAYOUT,
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
# subtests write to same location
|
||||
'--overwrite',
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
|
@ -155,25 +200,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
|
|||
def only_eynollah(logrec):
|
||||
return logrec.name == 'enhancement'
|
||||
runner = CliRunner()
|
||||
for options in [
|
||||
[], # defaults
|
||||
["-sos"],
|
||||
]:
|
||||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
|
||||
assert outfile.exists()
|
||||
with Image.open(infile) as original_img:
|
||||
original_size = original_img.size
|
||||
with Image.open(outfile) as enhanced_img:
|
||||
enhanced_size = enhanced_img.size
|
||||
assert (original_size == enhanced_size) == ("-sos" in options)
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
|
||||
assert outfile.exists()
|
||||
with Image.open(infile) as original_img:
|
||||
original_size = original_img.size
|
||||
with Image.open(outfile) as enhanced_img:
|
||||
enhanced_size = enhanced_img.size
|
||||
assert (original_size == enhanced_size) == ("-sos" in options)
|
||||
|
||||
def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):
|
||||
def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
|
||||
indir = testdir.joinpath('resources')
|
||||
outdir = tmp_path
|
||||
args = [
|
||||
|
@ -194,7 +233,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
|
|||
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
|
||||
assert len(list(outdir.iterdir())) == 2
|
||||
|
||||
def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||
def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
|
||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
||||
args = [
|
||||
|
@ -223,7 +262,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
|
|||
#assert in_order != out_order
|
||||
assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
|
||||
|
||||
def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):
|
||||
def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
|
||||
indir = testdir.joinpath('resources')
|
||||
outdir = tmp_path
|
||||
args = [
|
||||
|
@ -245,7 +284,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
|
|||
#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
|
||||
assert len(list(outdir.iterdir())) == 2
|
||||
|
||||
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
[
|
||||
[], # defaults
|
||||
["-doit", #str(outrenderfile.parent)],
|
||||
],
|
||||
["-trocr"],
|
||||
], ids=str)
|
||||
def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
|
||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
||||
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
|
||||
|
@ -255,8 +302,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
|
|||
'-i', str(infile),
|
||||
'-dx', str(infile.parent),
|
||||
'-o', str(outfile.parent),
|
||||
# subtests write to same location
|
||||
'--overwrite',
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
|
@ -264,33 +309,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
|
|||
def only_eynollah(logrec):
|
||||
return logrec.name == 'eynollah'
|
||||
runner = CliRunner()
|
||||
for options in [
|
||||
# kba Fri Sep 26 12:53:49 CEST 2025
|
||||
# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
|
||||
# [], # defaults
|
||||
# ["-doit", str(outrenderfile.parent)],
|
||||
["-trocr"],
|
||||
]:
|
||||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
# FIXME: ocr has no logging!
|
||||
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
|
||||
assert outfile.exists()
|
||||
if "-doit" in options:
|
||||
assert outrenderfile.exists()
|
||||
#in_tree = page_from_file(str(infile)).etree
|
||||
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
|
||||
out_tree = page_from_file(str(outfile)).etree
|
||||
out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
|
||||
assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
|
||||
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
|
||||
if "-doit" in options:
|
||||
options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
# FIXME: ocr has no logging!
|
||||
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
|
||||
assert outfile.exists()
|
||||
if "-doit" in options:
|
||||
assert outrenderfile.exists()
|
||||
#in_tree = page_from_file(str(infile)).etree
|
||||
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
|
||||
out_tree = page_from_file(str(outfile)).etree
|
||||
out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
|
||||
assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
|
||||
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
|
||||
|
||||
@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged")
|
||||
def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
|
||||
def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
|
||||
indir = testdir.joinpath('resources')
|
||||
outdir = tmp_path
|
||||
args = [
|
||||
|
|
|
@ -2,6 +2,5 @@ def test_utils_import():
|
|||
import eynollah.utils
|
||||
import eynollah.utils.contour
|
||||
import eynollah.utils.drop_capitals
|
||||
import eynollah.utils.drop_capitals
|
||||
import eynollah.utils.is_nan
|
||||
import eynollah.utils.rotate
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue