Merge pull request #4 from bertsky/loky-with-shm-for-175-rebuilt-refactored

refactoring for 192: speedup and improvements
This commit is contained in:
Robert Sachunsky 2025-10-09 22:18:53 +02:00 committed by GitHub
commit d96af425a7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 1027 additions and 1709 deletions

View file

@ -65,7 +65,12 @@ jobs:
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
make install-dev EXTRAS=OCR,plotting make install-dev EXTRAS=OCR,plotting
make deps-test make deps-test EXTRAS=OCR,plotting
ls -l models_*
- name: Lint with ruff
uses: astral-sh/ruff-action@v3
with:
src: "./src"
- name: Test with pytest - name: Test with pytest
run: make coverage PYTEST_ARGS="-vv --junitxml=pytest.xml" run: make coverage PYTEST_ARGS="-vv --junitxml=pytest.xml"
- name: Get coverage results - name: Get coverage results

View file

@ -15,11 +15,17 @@ Fixed:
* `get_smallest_skew`: after shifting search range of rotation angle, use overall best result * `get_smallest_skew`: after shifting search range of rotation angle, use overall best result
* Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR) * Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR)
* OCR: re-instate missing methods and fix `utils_ocr` function calls * OCR: re-instate missing methods and fix `utils_ocr` function calls
* mbreorder/enhancement CLIs: missing imports
* :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`) * :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`)
f458e3e f458e3e
* tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate` * tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate`
(so CUDA memory gets freed between tests if running on GPU) (so CUDA memory gets freed between tests if running on GPU)
Added:
* test coverage for OCR options in `layout`
* test coverage for table detection in `layout`
* CI linting with ruff
Changed: Changed:
* polygons: slightly widen for regions and lines, increase for separators * polygons: slightly widen for regions and lines, increase for separators
@ -28,7 +34,19 @@ Changed:
but use shared memory if necessary, and switch back from `loky` to stdlib, but use shared memory if necessary, and switch back from `loky` to stdlib,
and shutdown in `del()` instead of `atexit` and shutdown in `del()` instead of `atexit`
* :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too * :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too
* OCR: allow running `-tr` without `-fl`, too
* :fire: writer: use `@type='heading'` instead of `'header'` for headings * :fire: writer: use `@type='heading'` instead of `'header'` for headings
* :fire: performance gains via refactoring (simplification, less copy-code, vectorization,
avoiding unused calculations, avoiding unnecessary 3-channel image operations)
* :fire: heuristic reading order detection: many improvements
- contour vs splitter box matching:
* contour must be contained in box exactly instead of heuristics
* make fallback center matching, center must be contained in box
- original vs deskewed contour matching:
* same min-area filter on both sides
* similar area score in addition to center proximity
* avoid duplicate and missing mappings by allowing N:M
matches and splitting+joining where necessary
* CI: update+improve model caching * CI: update+improve model caching

View file

@ -58,6 +58,9 @@ help:
# Download and extract models to $(PWD)/models_layout_v0_5_0 # Download and extract models to $(PWD)/models_layout_v0_5_0
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
# do not download these files if we already have the directories
.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
$(BIN_MODELFILE): $(BIN_MODELFILE):
wget -O $@ $(BIN_MODEL) wget -O $@ $(BIN_MODEL)
$(SEG_MODELFILE): $(SEG_MODELFILE):
@ -90,26 +93,29 @@ deps-test: $(OCR_MODELNAME)
endif endif
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME) deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
$(PIP) install -r requirements-test.txt $(PIP) install -r requirements-test.txt
ifeq (OCR,$(findstring OCR, $(EXTRAS)))
ln -rs $(OCR_MODELNAME)/* $(SEG_MODELNAME)/
endif
smoke-test: TMPDIR != mktemp -d smoke-test: TMPDIR != mktemp -d
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
# layout analysis: # layout analysis:
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
# layout, directory mode (skip one, add one): # layout, directory mode (skip one, add one):
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
# mbreorder, directory mode (overwrite): # mbreorder, directory mode (overwrite):
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
# binarize: # binarize:
eynollah binarization -m $(CURDIR)/default-2021-03-09 -i $< -o $(TMPDIR)/$(<F) eynollah binarization -m $(CURDIR)/$(BIN_MODELNAME) -i $< -o $(TMPDIR)/$(<F)
test -s $(TMPDIR)/$(<F) test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" @set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
# enhance: # enhance:
eynollah enhancement -m $(CURDIR)/models_layout_v0_5_0 -sos -i $< -o $(TMPDIR) -O eynollah enhancement -m $(CURDIR)/$(SEG_MODELNAME) -sos -i $< -o $(TMPDIR) -O
test -s $(TMPDIR)/$(<F) test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" @set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
$(RM) -r $(TMPDIR) $(RM) -r $(TMPDIR)
@ -120,12 +126,12 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
cp $< $(TMPDIR) cp $< $(TMPDIR)
ocrd workspace -d $(TMPDIR) init ocrd workspace -d $(TMPDIR) init
ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F) ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/models_layout_v0_5_0 ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/$(SEG_MODELNAME)
result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \ result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \ fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/default-2021-03-09 ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/$(BIN_MODELNAME)
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/default-2021-03-09 -P operation_level region ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/$(BIN_MODELNAME) -P operation_level region
$(RM) -r $(TMPDIR) $(RM) -r $(TMPDIR)
# Run unit tests # Run unit tests

View file

@ -51,3 +51,21 @@ where = ["src"]
[tool.coverage.run] [tool.coverage.run]
branch = true branch = true
source = ["eynollah"] source = ["eynollah"]
[tool.ruff]
line-length = 120
[tool.ruff.lint]
ignore = [
# disable unused imports
"F401",
# disable import order
"E402",
# disable unused variables
"F841",
# disable bare except
"E722",
]
[tool.ruff.format]
quote-style = "preserve"

File diff suppressed because it is too large Load diff

View file

@ -6,23 +6,23 @@ from logging import Logger
import os import os
import time import time
from typing import Optional from typing import Optional
import atexit
from functools import partial
from pathlib import Path from pathlib import Path
from multiprocessing import cpu_count
import gc import gc
import cv2 import cv2
import numpy as np import numpy as np
from ocrd_utils import getLogger, tf_disable_interactive_logs from ocrd_utils import getLogger, tf_disable_interactive_logs
import tensorflow as tf import tensorflow as tf
from skimage.morphology import skeletonize from skimage.morphology import skeletonize
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from .utils.resize import resize_image from .utils.resize import resize_image
from .utils.pil_cv2 import pil2cv from .utils.pil_cv2 import pil2cv
from .utils import ( from .utils import (
is_image_filename, is_image_filename,
crop_image_inside_box crop_image_inside_box
) )
from .eynollah import PatchEncoder, Patches
DPI_THRESHOLD = 298 DPI_THRESHOLD = 298
KERNEL = np.ones((5, 5), np.uint8) KERNEL = np.ones((5, 5), np.uint8)

View file

@ -6,25 +6,24 @@ from logging import Logger
import os import os
import time import time
from typing import Optional from typing import Optional
import atexit
from functools import partial
from pathlib import Path from pathlib import Path
from multiprocessing import cpu_count
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import cv2 import cv2
import numpy as np import numpy as np
from ocrd_utils import getLogger from ocrd_utils import getLogger
import statistics import statistics
import tensorflow as tf import tensorflow as tf
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from .utils.resize import resize_image
from .utils.resize import resize_image
from .utils.contour import ( from .utils.contour import (
find_new_features_of_contours, find_new_features_of_contours,
return_contours_of_image, return_contours_of_image,
return_parent_contours, return_parent_contours,
) )
from .utils import is_xml_filename from .utils import is_xml_filename
from .eynollah import PatchEncoder, Patches
DPI_THRESHOLD = 298 DPI_THRESHOLD = 298
KERNEL = np.ones((5, 5), np.uint8) KERNEL = np.ones((5, 5), np.uint8)

View file

@ -15,10 +15,21 @@ from scipy.ndimage import gaussian_filter1d
from .is_nan import isNaN from .is_nan import isNaN
from .contour import (contours_in_same_horizon, from .contour import (contours_in_same_horizon,
find_center_of_contours,
find_new_features_of_contours, find_new_features_of_contours,
return_contours_of_image, return_contours_of_image,
return_parent_contours) return_parent_contours)
def pairwise(iterable):
# pairwise('ABCDEFG') → AB BC CD DE EF FG
iterator = iter(iterable)
a = next(iterator, None)
for b in iterator:
yield a, b
a = b
def return_x_start_end_mothers_childs_and_type_of_reading_order( def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
@ -785,7 +796,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8):
return len(peaks_fin_true), peaks_fin_true return len(peaks_fin_true), peaks_fin_true
def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8):
regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0) regions_without_separators_0 = regions_without_separators.sum(axis=0)
##plt.plot(regions_without_separators_0) ##plt.plot(regions_without_separators_0)
##plt.show() ##plt.show()
@ -812,7 +823,10 @@ def return_regions_without_separators(regions_pre):
return regions_without_separators return regions_without_separators
def put_drop_out_from_only_drop_model(layout_no_patch, layout1): def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
drop_only = (layout_no_patch[:, :, 0] == 4) * 1 if layout_no_patch.ndim == 3:
layout_no_patch = layout_no_patch[:, :, 0]
drop_only = (layout_no_patch[:, :] == 4) * 1
contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop, hir_on_drop = return_contours_of_image(drop_only)
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
@ -838,9 +852,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
(map_of_drop_contour_bb == 5).sum()) >= 15: (map_of_drop_contour_bb == 5).sum()) >= 15:
contours_drop_parent_final.append(contours_drop_parent[jj]) contours_drop_parent_final.append(contours_drop_parent[jj])
layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0 layout_no_patch[:, :][layout_no_patch[:, :] == 4] = 0
layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=4)
layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4))
return layout_no_patch return layout_no_patch
@ -914,29 +927,28 @@ def check_any_text_region_in_model_one_is_main_or_header(
contours_only_text_parent_main_d=[] contours_only_text_parent_main_d=[]
contours_only_text_parent_head_d=[] contours_only_text_parent_head_d=[]
for ii in range(len(contours_only_text_parent)): for ii, con in enumerate(contours_only_text_parent):
con=contours_only_text_parent[ii] img = np.zeros(regions_model_1.shape[:2])
img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) img = cv2.fillPoly(img, pts=[con], color=255)
img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
all_pixels=((img[:,:,0]==255)*1).sum() all_pixels=((img == 255)*1).sum()
pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() pixels_header=( ( (img == 255) & (regions_model_full[:,:,0]==2) )*1 ).sum()
pixels_main=all_pixels-pixels_header pixels_main=all_pixels-pixels_header
if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ]=2
contours_only_text_parent_head.append(con) contours_only_text_parent_head.append(con)
if contours_only_text_parent_d_ordered is not None: if len(contours_only_text_parent_d_ordered):
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_head.append(all_box_coord[ii]) all_box_coord_head.append(all_box_coord[ii])
slopes_head.append(slopes[ii]) slopes_head.append(slopes[ii])
all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
conf_contours_head.append(None) conf_contours_head.append(None)
else: else:
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ]=1
contours_only_text_parent_main.append(con) contours_only_text_parent_main.append(con)
conf_contours_main.append(conf_contours[ii]) conf_contours_main.append(conf_contours[ii])
if contours_only_text_parent_d_ordered is not None: if len(contours_only_text_parent_d_ordered):
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_main.append(all_box_coord[ii]) all_box_coord_main.append(all_box_coord[ii])
slopes_main.append(slopes[ii]) slopes_main.append(slopes[ii])
@ -1004,11 +1016,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
contours_only_text_parent_head_d=[] contours_only_text_parent_head_d=[]
for ii, con in enumerate(contours_only_text_parent_z): for ii, con in enumerate(contours_only_text_parent_z):
img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = np.zeros(regions_model_1.shape[:2])
img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) img = cv2.fillPoly(img, pts=[con], color=255)
all_pixels = (img[:,:,0]==255).sum() all_pixels = (img == 255).sum()
pixels_header=((img[:,:,0]==255) & pixels_header=((img == 255) &
(regions_model_full[:,:,0]==2)).sum() (regions_model_full[:,:,0]==2)).sum()
pixels_main = all_pixels - pixels_header pixels_main = all_pixels - pixels_header
@ -1018,20 +1030,20 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
( pixels_header / float(pixels_main) >= 0.3 and ( pixels_header / float(pixels_main) >= 0.3 and
length_con[ii] / float(height_con[ii]) >=3 )): length_con[ii] / float(height_con[ii]) >=3 )):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 2
contours_only_text_parent_head.append(contours_only_text_parent[ii]) contours_only_text_parent_head.append(contours_only_text_parent[ii])
conf_contours_head.append(None) # why not conf_contours[ii], too? conf_contours_head.append(None) # why not conf_contours[ii], too?
if contours_only_text_parent_d_ordered is not None: if len(contours_only_text_parent_d_ordered):
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_head.append(all_box_coord[ii]) all_box_coord_head.append(all_box_coord[ii])
slopes_head.append(slopes[ii]) slopes_head.append(slopes[ii])
all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
else: else:
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 1
contours_only_text_parent_main.append(contours_only_text_parent[ii]) contours_only_text_parent_main.append(contours_only_text_parent[ii])
conf_contours_main.append(conf_contours[ii]) conf_contours_main.append(conf_contours[ii])
if contours_only_text_parent_d_ordered is not None: if len(contours_only_text_parent_d_ordered):
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_main.append(all_box_coord[ii]) all_box_coord_main.append(all_box_coord[ii])
slopes_main.append(slopes[ii]) slopes_main.append(slopes[ii])
@ -1108,11 +1120,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
textlines_big.append(textlines_tot[i]) textlines_big.append(textlines_tot[i])
textlines_big_org_form.append(textlines_tot_org_form[i]) textlines_big_org_form.append(textlines_tot_org_form[i])
img_textline_s = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) img_textline_s = np.zeros(textline_iamge.shape[:2])
img_textline_s = cv2.fillPoly(img_textline_s, pts=textlines_small, color=(1, 1, 1)) img_textline_s = cv2.fillPoly(img_textline_s, pts=textlines_small, color=1)
img_textline_b = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) img_textline_b = np.zeros(textline_iamge.shape[:2])
img_textline_b = cv2.fillPoly(img_textline_b, pts=textlines_big, color=(1, 1, 1)) img_textline_b = cv2.fillPoly(img_textline_b, pts=textlines_big, color=1)
sum_small_big_all = img_textline_s + img_textline_b sum_small_big_all = img_textline_s + img_textline_b
sum_small_big_all2 = (sum_small_big_all[:, :] == 2) * 1 sum_small_big_all2 = (sum_small_big_all[:, :] == 2) * 1
@ -1124,11 +1136,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
# print(len(textlines_small),'small') # print(len(textlines_small),'small')
intersections = [] intersections = []
for z2 in range(len(textlines_big)): for z2 in range(len(textlines_big)):
img_text = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) img_text = np.zeros(textline_iamge.shape[:2])
img_text = cv2.fillPoly(img_text, pts=[textlines_small[z1]], color=(1, 1, 1)) img_text = cv2.fillPoly(img_text, pts=[textlines_small[z1]], color=1)
img_text2 = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) img_text2 = np.zeros(textline_iamge.shape[:2])
img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z2]], color=(1, 1, 1)) img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z2]], color=1)
sum_small_big = img_text2 + img_text sum_small_big = img_text2 + img_text
sum_small_big_2 = (sum_small_big[:, :] == 2) * 1 sum_small_big_2 = (sum_small_big[:, :] == 2) * 1
@ -1154,19 +1166,17 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
index_small_textlines = list(np.where(np.array(dis_small_from_bigs_tot) == z)[0]) index_small_textlines = list(np.where(np.array(dis_small_from_bigs_tot) == z)[0])
# print(z,index_small_textlines) # print(z,index_small_textlines)
img_text2 = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1], 3)) img_text2 = np.zeros(textline_iamge.shape[:2], dtype=np.uint8)
img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z]], color=(255, 255, 255)) img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z]], color=255)
textlines_big_with_change.append(z) textlines_big_with_change.append(z)
for k in index_small_textlines: for k in index_small_textlines:
img_text2 = cv2.fillPoly(img_text2, pts=[textlines_small[k]], color=(255, 255, 255)) img_text2 = cv2.fillPoly(img_text2, pts=[textlines_small[k]], color=255)
textlines_small_with_change.append(k) textlines_small_with_change.append(k)
img_text2 = img_text2.astype(np.uint8) _, thresh = cv2.threshold(img_text2, 0, 255, 0)
imgray = cv2.cvtColor(img_text2, cv2.COLOR_BGR2GRAY) cont, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
cont, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(cont[0],type(cont)) # print(cont[0],type(cont))
textlines_big_with_change_con.append(cont) textlines_big_with_change_con.append(cont)
@ -1178,111 +1188,51 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
# print(textlines_big_with_change,'textlines_big_with_change') # print(textlines_big_with_change,'textlines_big_with_change')
# print(textlines_small_with_change,'textlines_small_with_change') # print(textlines_small_with_change,'textlines_small_with_change')
# print(textlines_big) # print(textlines_big)
textlines_con_changed.append(textlines_big_org_form)
else:
textlines_con_changed.append(textlines_big_org_form) textlines_con_changed.append(textlines_big_org_form)
return textlines_con_changed return textlines_con_changed
def order_of_regions(textline_mask, contours_main, contours_header, y_ref): def order_of_regions(textline_mask, contours_main, contours_head, y_ref):
##plt.imshow(textline_mask) ##plt.imshow(textline_mask)
##plt.show() ##plt.show()
""" y = textline_mask.sum(axis=1) # horizontal projection profile
print(len(contours_main),'contours_main')
mada_n=textline_mask.sum(axis=1)
y=mada_n[:]
y_help=np.zeros(len(y)+40)
y_help[20:len(y)+20]=y
x=np.arange(len(y))
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
##plt.imshow(textline_mask[:,:])
##plt.show()
sigma_gaus=8
z= gaussian_filter1d(y_help, sigma_gaus)
zneg_rev=-y_help+np.max(y_help)
zneg=np.zeros(len(zneg_rev)+40)
zneg[20:len(zneg_rev)+20]=zneg_rev
zneg= gaussian_filter1d(zneg, sigma_gaus)
peaks, _ = find_peaks(z, height=0)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks_neg=peaks_neg-20-20
peaks=peaks-20
"""
textline_sum_along_width = textline_mask.sum(axis=1)
y = textline_sum_along_width[:]
y_padded = np.zeros(len(y) + 40) y_padded = np.zeros(len(y) + 40)
y_padded[20 : len(y) + 20] = y y_padded[20 : len(y) + 20] = y
x = np.arange(len(y))
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
sigma_gaus = 8 sigma_gaus = 8
z = gaussian_filter1d(y_padded, sigma_gaus) #z = gaussian_filter1d(y_padded, sigma_gaus)
zneg_rev = -y_padded + np.max(y_padded) #peaks, _ = find_peaks(z, height=0)
#peaks = peaks - 20
zneg_rev = np.max(y_padded) - y_padded
zneg = np.zeros(len(zneg_rev) + 40) zneg = np.zeros(len(zneg_rev) + 40)
zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg[20 : len(zneg_rev) + 20] = zneg_rev
zneg = gaussian_filter1d(zneg, sigma_gaus) zneg = gaussian_filter1d(zneg, sigma_gaus)
peaks, _ = find_peaks(z, height=0)
peaks_neg, _ = find_peaks(zneg, height=0) peaks_neg, _ = find_peaks(zneg, height=0)
peaks_neg = peaks_neg - 20 - 20 peaks_neg = peaks_neg - 20 - 20
peaks = peaks - 20
##plt.plot(z) ##plt.plot(z)
##plt.show() ##plt.show()
if contours_main != None: cx_main, cy_main = find_center_of_contours(contours_main)
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) cx_head, cy_head = find_center_of_contours(contours_head)
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) peaks_neg_new = np.append(np.insert(peaks_neg, 0, 0), textline_mask.shape[0])
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) # offset from bbox of mask
peaks_neg_new += y_ref
if len(contours_header) != None: # assert not len(cy_main) or np.min(peaks_neg_new) <= np.min(cy_main) and np.max(cy_main) <= np.max(peaks_neg_new)
areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))]) # assert not len(cy_head) or np.min(peaks_neg_new) <= np.min(cy_head) and np.max(cy_head) <= np.max(peaks_neg_new)
M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) matrix_of_orders = np.zeros((len(contours_main) + len(contours_head), 5), dtype=int)
x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_head))
y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
# print(cy_main,'mainy')
peaks_neg_new = []
peaks_neg_new.append(0 + y_ref)
for iii in range(len(peaks_neg)):
peaks_neg_new.append(peaks_neg[iii] + y_ref)
peaks_neg_new.append(textline_mask.shape[0] + y_ref)
if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new):
cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10
if contours_main != None:
indexer_main = np.arange(len(contours_main))
if contours_main != None:
len_main = len(contours_main)
else:
len_main = 0
matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5))
matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header))
matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[: len(contours_main), 1] = 1
matrix_of_orders[len(contours_main) :, 1] = 2 matrix_of_orders[len(contours_main) :, 1] = 2
matrix_of_orders[: len(contours_main), 2] = cx_main matrix_of_orders[: len(contours_main), 2] = cx_main
matrix_of_orders[len(contours_main) :, 2] = cx_header matrix_of_orders[len(contours_main) :, 2] = cx_head
matrix_of_orders[: len(contours_main), 3] = cy_main matrix_of_orders[: len(contours_main), 3] = cy_main
matrix_of_orders[len(contours_main) :, 3] = cy_header matrix_of_orders[len(contours_main) :, 3] = cy_head
matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main))
matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_head))
# print(peaks_neg_new,'peaks_neg_new') # print(peaks_neg_new,'peaks_neg_new')
# print(matrix_of_orders,'matrix_of_orders') # print(matrix_of_orders,'matrix_of_orders')
@ -1290,70 +1240,42 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
final_indexers_sorted = [] final_indexers_sorted = []
final_types = [] final_types = []
final_index_type = [] final_index_type = []
for i in range(len(peaks_neg_new) - 1): for top, bot in pairwise(peaks_neg_new):
top = peaks_neg_new[i] indexes_in, types_in, cxs_in, cys_in, typed_indexes_in = \
down = peaks_neg_new[i + 1] matrix_of_orders[(matrix_of_orders[:, 3] >= top) &
indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < bot)].T
((matrix_of_orders[:, 3] < down))]
cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) &
((matrix_of_orders[:, 3] < down))]
cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) &
((matrix_of_orders[:, 3] < down))]
types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) &
(matrix_of_orders[:, 3] < down)]
index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) &
(matrix_of_orders[:, 3] < down)]
sorted_inside = np.argsort(cxs_in) sorted_inside = np.argsort(cxs_in)
ind_in_int = indexes_in[sorted_inside] final_indexers_sorted.extend(indexes_in[sorted_inside])
ind_in_type = types_of_text[sorted_inside] final_types.extend(types_in[sorted_inside])
ind_ind_type = index_types_of_text[sorted_inside] final_index_type.extend(typed_indexes_in[sorted_inside])
for j in range(len(ind_in_int)):
final_indexers_sorted.append(int(ind_in_int[j]))
final_types.append(int(ind_in_type[j]))
final_index_type.append(int(ind_ind_type[j]))
##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:]
# This fix is applied if the sum of the lengths of contours and contours_h # assert len(final_indexers_sorted) == len(contours_main) + len(contours_head)
# does not match final_indexers_sorted. However, this is not the optimal solution.. # assert not len(final_indexers_sorted) or max(final_index_type) == max(len(contours_main)
if len(cy_main) + len(cy_header) == len(final_index_type):
pass
else:
indexes_missed = set(np.arange(len(cy_main) + len(cy_header))) - set(final_indexers_sorted)
for ind_missed in indexes_missed:
final_indexers_sorted.append(ind_missed)
final_types.append(1)
final_index_type.append(ind_missed)
return final_indexers_sorted, matrix_of_orders, final_types, final_index_type return np.array(final_indexers_sorted), np.array(final_types), np.array(final_index_type)
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
img_p_in_ver, img_in_hor,num_col_classifier): img_p_in_ver, img_in_hor,num_col_classifier):
#img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
img_p_in_ver=img_p_in_ver.astype(np.uint8) _, thresh = cv2.threshold(img_p_in_ver, 0, 255, 0)
img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) contours_lines_ver, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray = cv2.cvtColor(img_p_in_ver, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \ slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \
find_features_of_lines(contours_lines_ver) find_features_of_lines(contours_lines_ver)
for i in range(len(x_min_main_ver)): for i in range(len(x_min_main_ver)):
img_p_in_ver[int(y_min_main_ver[i]): img_p_in_ver[int(y_min_main_ver[i]):
int(y_min_main_ver[i])+30, int(y_min_main_ver[i])+30,
int(cx_main_ver[i])-25: int(cx_main_ver[i])-25:
int(cx_main_ver[i])+25, 0] = 0 int(cx_main_ver[i])+25] = 0
img_p_in_ver[int(y_max_main_ver[i])-30: img_p_in_ver[int(y_max_main_ver[i])-30:
int(y_max_main_ver[i]), int(y_max_main_ver[i]),
int(cx_main_ver[i])-25: int(cx_main_ver[i])-25:
int(cx_main_ver[i])+25, 0] = 0 int(cx_main_ver[i])+25] = 0
img_in_hor=img_in_hor.astype(np.uint8) _, thresh = cv2.threshold(img_in_hor, 0, 255, 0)
img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) contours_lines_hor, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \ slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \
find_features_of_lines(contours_lines_hor) find_features_of_lines(contours_lines_hor)
@ -1409,22 +1331,19 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
img_p_in=img_in_hor img_p_in=img_in_hor
special_separators=[] special_separators=[]
img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 img_p_in_ver[img_p_in_ver == 255] = 1
sep_ver_hor = img_p_in + img_p_in_ver sep_ver_hor = img_p_in + img_p_in_ver
sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 sep_ver_hor_cross = (sep_ver_hor == 2) * 1
sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) _, thresh = cv2.threshold(sep_ver_hor_cross.astype(np.uint8), 0, 255, 0)
sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) contours_cross, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) center_cross = np.array(find_center_of_contours(contours_cross), dtype=int)
ret, thresh = cv2.threshold(imgray, 0, 255, 0) for cx, cy in center_cross.T:
contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) img_p_in[cy - 30: cy + 30, cx + 5: cx + 40] = 0
cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) img_p_in[cy - 30: cy + 30, cx - 40: cx - 4] = 0
for ii in range(len(cx_cross)):
img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0
img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0
else: else:
img_p_in=np.copy(img_in_hor) img_p_in=np.copy(img_in_hor)
special_separators=[] special_separators=[]
return img_p_in[:,:,0], special_separators return img_p_in, special_separators
def return_points_with_boundies(peaks_neg_fin, first_point, last_point): def return_points_with_boundies(peaks_neg_fin, first_point, last_point):
peaks_neg_tot = [] peaks_neg_tot = []
@ -1434,11 +1353,11 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point):
peaks_neg_tot.append(last_point) peaks_neg_tot.append(last_point)
return peaks_neg_tot return peaks_neg_tot
def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, label_lines, contours_h=None):
t_ins_c0 = time.time() t_ins_c0 = time.time()
separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 separators_closeup=( (region_pre_p[:,:]==label_lines))*1
separators_closeup[0:110,:,:]=0 separators_closeup[0:110,:]=0
separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 separators_closeup[separators_closeup.shape[0]-150:,:]=0
kernel = np.ones((5,5),np.uint8) kernel = np.ones((5,5),np.uint8)
separators_closeup=separators_closeup.astype(np.uint8) separators_closeup=separators_closeup.astype(np.uint8)
@ -1450,15 +1369,11 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
separators_closeup_n=separators_closeup_n.astype(np.uint8) separators_closeup_n=separators_closeup_n.astype(np.uint8)
separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) )
separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] separators_closeup_n_binary[:,:]=separators_closeup_n[:,:]
separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1
gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) _, thresh_e = cv2.threshold(separators_closeup_n_binary, 0, 255, 0)
gray_early=gray_early.astype(np.uint8) contours_line_e, _ = cv2.findContours(thresh_e.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY)
ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0)
contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
_, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \ _, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \
find_features_of_lines(contours_line_e) find_features_of_lines(contours_line_e)
dist_ye = y_max_main - y_min_main dist_ye = y_max_main - y_min_main
@ -1468,10 +1383,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
cnts_hor_e=[] cnts_hor_e=[]
for ce in args_hor_e: for ce in args_hor_e:
cnts_hor_e.append(contours_line_e[ce]) cnts_hor_e.append(contours_line_e[ce])
figs_e=np.zeros(thresh_e.shape)
figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1))
separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0,0,0)) separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=0)
gray = cv2.bitwise_not(separators_closeup_n_binary) gray = cv2.bitwise_not(separators_closeup_n_binary)
gray=gray.astype(np.uint8) gray=gray.astype(np.uint8)
@ -1491,7 +1404,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
kernel = np.ones((5,5),np.uint8) kernel = np.ones((5,5),np.uint8)
horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.dilate(horizontal,kernel,iterations = 2)
horizontal = cv2.erode(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2)
horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255,255,255)) horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=255)
rows = vertical.shape[0] rows = vertical.shape[0]
verticalsize = rows // 30 verticalsize = rows // 30
@ -1509,13 +1422,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][vertical[:,:]!=0]=1
separators_closeup_new[:,:][horizontal[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1
vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) _, thresh = cv2.threshold(vertical, 0, 255, 0)
vertical=vertical.astype(np.uint8) contours_line_vers, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \
find_features_of_lines(contours_line_vers) find_features_of_lines(contours_line_vers)
@ -1530,11 +1438,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
dist_y_ver=y_max_main_ver-y_min_main_ver dist_y_ver=y_max_main_ver-y_min_main_ver
len_y=separators_closeup.shape[0]/3.0 len_y=separators_closeup.shape[0]/3.0
horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) _, thresh = cv2.threshold(horizontal, 0, 255, 0)
horizontal=horizontal.astype(np.uint8) contours_line_hors, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \
find_features_of_lines(contours_line_hors) find_features_of_lines(contours_line_hors)
@ -1627,7 +1532,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
peaks_neg_fin_fin=[] peaks_neg_fin_fin=[]
for itiles in args_big_parts: for itiles in args_big_parts:
regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]): regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):
int(splitter_y_new[itiles+1]),:,0] int(splitter_y_new[itiles+1]),:]
try: try:
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile,
num_col_classifier, tables, multiplier=7.0) num_col_classifier, tables, multiplier=7.0)

View file

@ -36,14 +36,8 @@ def find_contours_mean_y_diff(contours_main):
return np.mean(np.diff(np.sort(np.array(cy_main)))) return np.mean(np.diff(np.sort(np.array(cy_main))))
def get_text_region_boxes_by_given_contours(contours): def get_text_region_boxes_by_given_contours(contours):
boxes = [] return [cv2.boundingRect(contour)
contours_new = [] for contour in contours]
for jj in range(len(contours)):
box = cv2.boundingRect(contours[jj])
boxes.append(box)
contours_new.append(contours[jj])
return boxes, contours_new
def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
found_polygons_early = [] found_polygons_early = []
@ -79,61 +73,37 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.
found_polygons_early.append(polygon2contour(polygon)) found_polygons_early.append(polygon2contour(polygon))
return found_polygons_early return found_polygons_early
def find_new_features_of_contours(contours_main): def find_center_of_contours(contours):
areas_main = np.array([cv2.contourArea(contours_main[j]) moments = [cv2.moments(contour) for contour in contours]
for j in range(len(contours_main))]) cx = [feat["m10"] / (feat["m00"] + 1e-32)
M_main = [cv2.moments(contours_main[j]) for feat in moments]
for j in range(len(contours_main))] cy = [feat["m01"] / (feat["m00"] + 1e-32)
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for feat in moments]
for j in range(len(M_main))] return cx, cy
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32))
for j in range(len(M_main))]
try:
x_min_main = np.array([np.min(contours_main[j][:, 0, 0])
for j in range(len(contours_main))])
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0])
for j in range(len(contours_main))])
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0]
for j in range(len(contours_main))])
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1]
for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0, 0])
for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 0, 1])
for j in range(len(contours_main))])
y_max_main = np.array([np.max(contours_main[j][:, 0, 1])
for j in range(len(contours_main))])
except:
x_min_main = np.array([np.min(contours_main[j][:, 0])
for j in range(len(contours_main))])
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0])
for j in range(len(contours_main))])
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0]
for j in range(len(contours_main))])
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1]
for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0])
for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 1])
for j in range(len(contours_main))])
y_max_main = np.array([np.max(contours_main[j][:, 1])
for j in range(len(contours_main))])
# dis_x=np.abs(x_max_main-x_min_main)
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin def find_new_features_of_contours(contours):
# areas = np.array([cv2.contourArea(contour) for contour in contours])
cx, cy = find_center_of_contours(contours)
slice_x = np.index_exp[:, 0, 0]
slice_y = np.index_exp[:, 0, 1]
if any(contour.ndim < 3 for contour in contours):
slice_x = np.index_exp[:, 0]
slice_y = np.index_exp[:, 1]
x_min = np.array([np.min(contour[slice_x]) for contour in contours])
x_max = np.array([np.max(contour[slice_x]) for contour in contours])
y_min = np.array([np.min(contour[slice_y]) for contour in contours])
y_max = np.array([np.max(contour[slice_y]) for contour in contours])
# dis_x=np.abs(x_max-x_min)
y_corr_x_min = np.array([contour[np.argmin(contour[slice_x])][slice_y[1:]]
for contour in contours])
def find_features_of_contours(contours_main): return cx, cy, x_min, x_max, y_min, y_max, y_corr_x_min
areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))])
x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))])
y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) def find_features_of_contours(contours):
y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_min = np.array([np.min(contour[:,0,1]) for contour in contours])
y_max = np.array([np.max(contour[:,0,1]) for contour in contours])
return y_min_main, y_max_main return y_min, y_max
def return_parent_contours(contours, hierarchy): def return_parent_contours(contours, hierarchy):
contours_parent = [contours[i] contours_parent = [contours[i]
@ -143,14 +113,11 @@ def return_parent_contours(contours, hierarchy):
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if region_pre_p.ndim == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1 cnts_images = (region_pre_p[:, :, 0] == label) * 1
else: else:
cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hierarchy) contours_imgs = return_parent_contours(contours_imgs, hierarchy)
@ -159,13 +126,11 @@ def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
return contours_imgs return contours_imgs
def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): def do_work_of_contours_in_image(contour, index_r_con, img, slope_first):
img_copy = np.zeros(img.shape) img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) img_copy = cv2.fillPoly(img_copy, pts=[contour], color=1)
img_copy = rotation_image_new(img_copy, -slope_first) img_copy = rotation_image_new(img_copy, -slope_first)
img_copy = img_copy.astype(np.uint8) _, thresh = cv2.threshold(img_copy, 0, 255, 0)
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
@ -188,8 +153,8 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
cnts_org = [] cnts_org = []
# print(cnts,'cnts') # print(cnts,'cnts')
for i in range(len(cnts)): for i in range(len(cnts)):
img_copy = np.zeros(img.shape) img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=1)
# plt.imshow(img_copy) # plt.imshow(img_copy)
# plt.show() # plt.show()
@ -200,9 +165,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
# plt.imshow(img_copy) # plt.imshow(img_copy)
# plt.show() # plt.show()
img_copy = img_copy.astype(np.uint8) _, thresh = cv2.threshold(img_copy, 0, 255, 0)
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
@ -219,12 +182,11 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
interpolation=cv2.INTER_NEAREST) interpolation=cv2.INTER_NEAREST)
cnts_org = [] cnts_org = []
for cnt in cnts: for cnt in cnts:
img_copy = np.zeros(img.shape) img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
img_copy = cv2.fillPoly(img_copy, pts=[(cnt / zoom).astype(int)], color=(1, 1, 1)) img_copy = cv2.fillPoly(img_copy, pts=[cnt // zoom], color=1)
img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8)
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(img_copy, 0, 255, 0)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
@ -234,14 +196,13 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
return cnts_org return cnts_org
def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first, confidence_matrix): def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first, confidence_matrix):
img_copy = np.zeros(img.shape) img_copy = np.zeros(img.shape[:2], dtype=np.uint8)
img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=1)
confidence_matrix_mapped_with_contour = confidence_matrix * img_copy[:,:,0] confidence_matrix_mapped_with_contour = confidence_matrix * img_copy
confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy[:,:,0])) confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy))
img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8)
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(img_copy, 0, 255, 0)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if len(cont_int)==0: if len(cont_int)==0:
@ -255,7 +216,7 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
if not len(cnts): if not len(cnts):
return [], [] return []
confidence_matrix = cv2.resize(confidence_matrix, confidence_matrix = cv2.resize(confidence_matrix,
(img.shape[1] // 6, img.shape[0] // 6), (img.shape[1] // 6, img.shape[0] // 6),
@ -265,18 +226,15 @@ def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
cnt_mask = np.zeros(confidence_matrix.shape) cnt_mask = np.zeros(confidence_matrix.shape)
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0) cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
return cnts, confs return confs
def return_contours_of_interested_textline(region_pre_p, label): def return_contours_of_interested_textline(region_pre_p, label):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if region_pre_p.ndim == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1 cnts_images = (region_pre_p[:, :, 0] == label) * 1
else: else:
cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hierarchy) contours_imgs = return_parent_contours(contours_imgs, hierarchy)
@ -286,54 +244,15 @@ def return_contours_of_interested_textline(region_pre_p, label):
def return_contours_of_image(image): def return_contours_of_image(image):
if len(image.shape) == 2: if len(image.shape) == 2:
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
image = image.astype(np.uint8) image = image.astype(np.uint8)
imgray = image
else: else:
image = image.astype(np.uint8) image = image.astype(np.uint8)
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return contours, hierarchy return contours, hierarchy
def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1
else:
cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
contours_imgs = filter_contours_area_of_image_tables(
thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size)
return contours_imgs
def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1
else:
cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
contours_imgs = filter_contours_area_of_image_tables(
thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area)
img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
return img_ret[:, :, 0]
def dilate_textline_contours(all_found_textline_polygons): def dilate_textline_contours(all_found_textline_polygons):
return [[polygon2contour(contour2polygon(contour, dilate=6)) return [[polygon2contour(contour2polygon(contour, dilate=6))
for contour in region] for contour in region]
@ -359,6 +278,21 @@ def polygon2contour(polygon: Polygon) -> np.ndarray:
polygon = np.array(polygon.exterior.coords[:-1], dtype=int) polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis] return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
def make_intersection(poly1, poly2):
interp = poly1.intersection(poly2)
# post-process
if interp.is_empty or interp.area == 0.0:
return None
if interp.geom_type == 'GeometryCollection':
# heterogeneous result: filter zero-area shapes (LineString, Point)
interp = unary_union([geom for geom in interp.geoms if geom.area > 0])
if interp.geom_type == 'MultiPolygon':
# homogeneous result: construct convex hull to connect
interp = join_polygons(interp.geoms)
assert interp.geom_type == 'Polygon', interp.wkt
interp = make_valid(interp)
return interp
def make_valid(polygon: Polygon) -> Polygon: def make_valid(polygon: Polygon) -> Polygon:
"""Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
def isint(x): def isint(x):

View file

@ -1,6 +1,7 @@
import numpy as np import numpy as np
import cv2 import cv2
from .contour import ( from .contour import (
find_center_of_contours,
find_new_features_of_contours, find_new_features_of_contours,
return_contours_of_image, return_contours_of_image,
return_parent_contours, return_parent_contours,
@ -22,8 +23,8 @@ def adhere_drop_capital_region_into_corresponding_textline(
): ):
# print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape') # print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape')
# print(all_found_textline_polygons[3]) # print(all_found_textline_polygons[3])
cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) cx_m, cy_m = find_center_of_contours(contours_only_text_parent)
cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_h) cx_h, cy_h = find_center_of_contours(contours_only_text_parent_h)
cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contours(polygons_of_drop_capitals) cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contours(polygons_of_drop_capitals)
img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
@ -89,9 +90,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
# print(region_final,'region_final') # print(region_final,'region_final')
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
try: try:
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
# print(all_box_coord[j_cont]) # print(all_box_coord[j_cont])
# print(cx_t) # print(cx_t)
# print(cy_t) # print(cy_t)
@ -153,9 +154,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
# areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))]) # areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))])
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
try: try:
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
# print(all_box_coord[j_cont]) # print(all_box_coord[j_cont])
# print(cx_t) # print(cx_t)
# print(cy_t) # print(cy_t)
@ -208,7 +209,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
try: try:
# print(all_found_textline_polygons[j_cont][0]) # print(all_found_textline_polygons[j_cont][0])
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
# print(all_box_coord[j_cont]) # print(all_box_coord[j_cont])
# print(cx_t) # print(cx_t)
# print(cy_t) # print(cy_t)
@ -261,7 +262,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
else: else:
pass pass
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) ##cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
###print(all_box_coord[j_cont]) ###print(all_box_coord[j_cont])
###print(cx_t) ###print(cx_t)
###print(cy_t) ###print(cy_t)
@ -315,9 +316,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
# print(region_final,'region_final') # print(region_final,'region_final')
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
try: try:
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
# print(all_box_coord[j_cont]) # print(all_box_coord[j_cont])
# print(cx_t) # print(cx_t)
# print(cy_t) # print(cy_t)
@ -375,12 +376,12 @@ def adhere_drop_capital_region_into_corresponding_textline(
# areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))]) # areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))])
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
# print(cx_t,'print') # print(cx_t,'print')
try: try:
# print(all_found_textline_polygons[j_cont][0]) # print(all_found_textline_polygons[j_cont][0])
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)])
# print(all_box_coord[j_cont]) # print(all_box_coord[j_cont])
# print(cx_t) # print(cx_t)
# print(cy_t) # print(cy_t)
@ -453,7 +454,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
#####try: #####try:
#####if len(contours_new_parent)==1: #####if len(contours_new_parent)==1:
######print(all_found_textline_polygons[j_cont][0]) ######print(all_found_textline_polygons[j_cont][0])
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont]) #####cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[j_cont])
######print(all_box_coord[j_cont]) ######print(all_box_coord[j_cont])
######print(cx_t) ######print(cx_t)
######print(cy_t) ######print(cy_t)

View file

@ -142,13 +142,12 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
rotation_matrix) rotation_matrix)
def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
(h, w) = img_patch.shape[:2] h, w = img_patch.shape[:2]
center = (w // 2, h // 2) center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, -thetha, 1.0) M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
x_d = M[0, 2] x_d = M[0, 2]
y_d = M[1, 2] y_d = M[1, 2]
thetha = thetha / 180. * np.pi rotation_matrix = M[:2, :2]
rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
contour_text_interest_copy = contour_text_interest.copy() contour_text_interest_copy = contour_text_interest.copy()
x_cont = contour_text_interest[:, 0, 0] x_cont = contour_text_interest[:, 0, 0]
@ -1302,19 +1301,16 @@ def separate_lines_new_inside_tiles(img_path, thetha):
def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines): def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines):
kernel = np.ones((5, 5), np.uint8) kernel = np.ones((5, 5), np.uint8)
pixel = 255 label = 255
min_area = 0 min_area = 0
max_area = 1 max_area = 1
if len(img_patch.shape) == 3: if img_patch.ndim == 3:
cnts_images = (img_patch[:, :, 0] == pixel) * 1 cnts_images = (img_patch[:, :, 0] == label) * 1
else: else:
cnts_images = (img_patch[:, :] == pixel) * 1 cnts_images = (img_patch[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) contours_imgs, hierarchy = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hierarchy) contours_imgs = return_parent_contours(contours_imgs, hierarchy)
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs = filter_contours_area_of_image_tables(thresh,
@ -1322,14 +1318,12 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
max_area=max_area, min_area=min_area) max_area=max_area, min_area=min_area)
cont_final = [] cont_final = []
for i in range(len(contours_imgs)): for i in range(len(contours_imgs)):
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3)) img_contour = np.zeros(cnts_images.shape[:2], dtype=np.uint8)
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255)) img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=255)
img_contour = img_contour.astype(np.uint8)
img_contour = cv2.dilate(img_contour, kernel, iterations=4) img_contour = cv2.dilate(img_contour, kernel, iterations=4)
imgrayrot = cv2.cvtColor(img_contour, cv2.COLOR_BGR2GRAY) _, threshrot = cv2.threshold(img_contour, 0, 255, 0)
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[ ##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
##0] ##0]
@ -1344,24 +1338,22 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
def textline_contours_postprocessing(textline_mask, slope, def textline_contours_postprocessing(textline_mask, slope,
contour_text_interest, box_ind, contour_text_interest, box_ind,
add_boxes_coor_into_textlines=False): add_boxes_coor_into_textlines=False):
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask * 255
textline_mask = textline_mask.astype(np.uint8)
kernel = np.ones((5, 5), np.uint8) kernel = np.ones((5, 5), np.uint8)
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel)
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel)
textline_mask = cv2.erode(textline_mask, kernel, iterations=2) textline_mask = cv2.erode(textline_mask, kernel, iterations=2)
# textline_mask = cv2.erode(textline_mask, kernel, iterations=1) # textline_mask = cv2.erode(textline_mask, kernel, iterations=1)
try:
x_help = 30 x_help = 30
y_help = 2 y_help = 2
textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help),
textline_mask.shape[1] + int(2 * x_help), 3)) textline_mask.shape[1] + int(2 * x_help)))
textline_mask_help[y_help : y_help + textline_mask.shape[0], textline_mask_help[y_help : y_help + textline_mask.shape[0],
x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) x_help : x_help + textline_mask.shape[1]] = np.copy(textline_mask[:, :])
dst = rotate_image(textline_mask_help, slope) dst = rotate_image(textline_mask_help, slope)
dst = dst[:, :, 0]
dst[dst != 0] = 1 dst[dst != 0] = 1
# if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3:
@ -1372,21 +1364,18 @@ def textline_contours_postprocessing(textline_mask, slope,
contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0]
contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
img_contour = np.zeros((box_ind[3], box_ind[2], 3)) img_contour = np.zeros((box_ind[3], box_ind[2]))
img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=255)
img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help),
img_contour.shape[1] + int(2 * x_help), 3)) img_contour.shape[1] + int(2 * x_help)))
img_contour_help[y_help : y_help + img_contour.shape[0], img_contour_help[y_help : y_help + img_contour.shape[0],
x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) x_help : x_help + img_contour.shape[1]] = np.copy(img_contour[:, :])
img_contour_rot = rotate_image(img_contour_help, slope) img_contour_rot = rotate_image(img_contour_help, slope)
img_contour_rot = img_contour_rot.astype(np.uint8) _, threshrot = cv2.threshold(img_contour_rot, 0, 255, 0)
# dst_help = dst_help.astype(np.uint8) contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY)
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))]
ind_big_con = np.argmax(len_con_text_rot) ind_big_con = np.argmax(len_con_text_rot)
@ -1398,8 +1387,6 @@ def textline_contours_postprocessing(textline_mask, slope,
else: else:
_, contours_rotated_clean = separate_lines( _, contours_rotated_clean = separate_lines(
dst, contours_text_rot[ind_big_con], slope, x_help, y_help) dst, contours_text_rot[ind_big_con], slope, x_help, y_help)
except:
contours_rotated_clean = []
return contours_rotated_clean return contours_rotated_clean
@ -1605,7 +1592,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
@wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new( def do_work_of_slopes_new(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par,
textline_mask_tot_ea=None, slope_deskew=0.0, textline_mask_tot_ea=None, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
): ):
@ -1660,12 +1647,12 @@ def do_work_of_slopes_new(
all_text_region_raw[mask_only_con_region == 0] = 0 all_text_region_raw[mask_only_con_region == 0] = 0
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope return cnt_clean_rot, crop_coor, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='textline_mask_tot_ea')
@wrap_ndarray_shared(kw='mask_texts_only') @wrap_ndarray_shared(kw='mask_texts_only')
def do_work_of_slopes_new_curved( def do_work_of_slopes_new_curved(
box_text, contour, contour_par, index_r_con, box_text, contour_par,
textline_mask_tot_ea=None, mask_texts_only=None, textline_mask_tot_ea=None, mask_texts_only=None,
num_col=1, scale_par=1.0, slope_deskew=0.0, num_col=1, scale_par=1.0, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
@ -1756,11 +1743,11 @@ def do_work_of_slopes_new_curved(
slope_for_all, contour_par, slope_for_all, contour_par,
box_text, True) box_text, True)
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope return textlines_cnt_per_region[::-1], crop_coor, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new_light( def do_work_of_slopes_new_light(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par,
textline_mask_tot_ea=None, slope_deskew=0, textline_light=True, textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
logger=None logger=None
): ):
@ -1790,4 +1777,4 @@ def do_work_of_slopes_new_light(
all_text_region_raw[mask_only_con_region == 0] = 0 all_text_region_raw[mask_only_con_region == 0] = 0
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text)
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew return cnt_clean_rot, crop_coor, slope_deskew

View file

@ -1,13 +1,17 @@
import math
import copy
import numpy as np import numpy as np
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
from scipy.signal import find_peaks from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d from scipy.ndimage import gaussian_filter1d
import math
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from Bio import pairwise2 from Bio import pairwise2
from .resize import resize_image from .resize import resize_image
def decode_batch_predictions(pred, num_to_char, max_len = 128): def decode_batch_predictions(pred, num_to_char, max_len = 128):
# input_len is the product of the batch size and the # input_len is the product of the batch size and the
# number of time steps. # number of time steps.
@ -370,7 +374,9 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind
return textline_contour return textline_contour
def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, def return_rnn_cnn_ocr_of_given_textlines(image,
all_found_textline_polygons,
all_box_coord,
prediction_model, prediction_model,
b_s_ocr, num_to_char, b_s_ocr, num_to_char,
textline_light=False, textline_light=False,

View file

@ -57,19 +57,15 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_margina
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
region_counter.inc('region') region_counter.inc('region')
for idx_textregion, _ in enumerate(order_of_texts): for idx_textregion in order_of_texts:
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1))) og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(idx_textregion + 1)))
region_counter.inc('region') region_counter.inc('region')
for id_marginal in id_of_marginalia_right: for id_marginal in id_of_marginalia_right:
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
region_counter.inc('region') region_counter.inc('region')
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, indexes_sorted, index_of_types, kind_of_texts, ref_point):
indexes_sorted = np.array(indexes_sorted)
index_of_types = np.array(index_of_types)
kind_of_texts = np.array(kind_of_texts)
id_of_texts = [] id_of_texts = []
order_of_texts = [] order_of_texts = []

View file

@ -56,113 +56,30 @@ class EynollahXmlWriter():
points_page_print = points_page_print + ' ' points_page_print = points_page_print + ' '
return points_page_print[:-1] return points_page_print[:-1]
def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion):
for j in range(len(all_found_textline_polygons_marginals[marginal_idx])):
coords = CoordsType()
textline = TextLineType(id=counter.next_line_id, Coords=coords)
if ocr_all_textlines_textregion:
textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
marginal_region.add_TextLine(textline)
marginal_region.set_orientation(-slopes_marginals[marginal_idx])
points_co = ''
for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])):
if not (self.curved_line or self.textline_light):
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
else:
textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
points_co += str(textline_x_coord)
points_co += ','
points_co += str(textline_y_coord)
if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45:
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45:
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
points_co += ' '
coords.set_points(points_co[:-1])
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
self.logger.debug('enter serialize_lines_in_region') self.logger.debug('enter serialize_lines_in_region')
for j in range(len(all_found_textline_polygons[region_idx])): for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]):
coords = CoordsType() coords = CoordsType()
textline = TextLineType(id=counter.next_line_id, Coords=coords) textline = TextLineType(id=counter.next_line_id, Coords=coords)
if ocr_all_textlines_textregion: if ocr_all_textlines_textregion:
# FIXME: add OCR confidence
textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])]) textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])])
text_region.add_TextLine(textline) text_region.add_TextLine(textline)
text_region.set_orientation(-slopes[region_idx]) text_region.set_orientation(-slopes[region_idx])
region_bboxes = all_box_coord[region_idx] region_bboxes = all_box_coord[region_idx]
points_co = '' points_co = ''
for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): for point in polygon_textline:
if not (self.curved_line or self.textline_light): if len(point) != 2:
if len(contour_textline) == 2: point = point[0]
textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) point_x = point[0] + page_coord[2]
textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) point_y = point[1] + page_coord[0]
else: # FIXME: or actually... not self.textline_light and not self.curved_line or np.abs(slopes[region_idx]) > 45?
textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) if not self.textline_light and not (self.curved_line and np.abs(slopes[region_idx]) <= 45):
textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) point_x += region_bboxes[2]
points_co += str(textline_x_coord) point_y += region_bboxes[0]
points_co += ',' point_x = max(0, int(point_x / self.scale_x))
points_co += str(textline_y_coord) point_y = max(0, int(point_y / self.scale_y))
points_co += str(point_x) + ',' + str(point_y) + ' '
if self.textline_light or (self.curved_line and np.abs(slopes[region_idx]) <= 45):
if len(contour_textline) == 2:
points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y))
elif self.curved_line and np.abs(slopes[region_idx]) > 45:
if len(contour_textline)==2:
points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x))
points_co += ','
points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0])/self.scale_y))
else:
points_co += str(int((contour_textline[0][0] + region_bboxes[2]+page_coord[2])/self.scale_x))
points_co += ','
points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y))
points_co += ' '
coords.set_points(points_co[:-1])
def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
self.logger.debug('enter serialize_lines_in_region')
for j in range(1):
coords = CoordsType()
textline = TextLineType(id=counter.next_line_id, Coords=coords)
if ocr_all_textlines_textregion:
textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
text_region.add_TextLine(textline)
#region_bboxes = all_box_coord[region_idx]
points_co = ''
for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]):
if len(contour_textline) == 2:
points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y))
points_co += ' '
coords.set_points(points_co[:-1]) coords.set_points(points_co[:-1])
def write_pagexml(self, pcgts): def write_pagexml(self, pcgts):
@ -170,8 +87,50 @@ class EynollahXmlWriter():
with open(self.output_filename, 'w') as f: with open(self.output_filename, 'w') as f:
f.write(to_xml(pcgts)) f.write(to_xml(pcgts))
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False): def build_pagexml_no_full_layout(
self.logger.debug('enter build_pagexml_no_full_layout') self, found_polygons_text_region,
page_coord, order_of_texts, id_of_texts,
all_found_textline_polygons,
all_box_coord,
found_polygons_text_region_img,
found_polygons_marginals_left, found_polygons_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
all_box_coord_marginals_left, all_box_coord_marginals_right,
slopes, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_seplines,
found_polygons_tables,
**kwargs):
return self.build_pagexml_full_layout(
found_polygons_text_region, [],
page_coord, order_of_texts, id_of_texts,
all_found_textline_polygons, [],
all_box_coord, [],
found_polygons_text_region_img, found_polygons_tables, [],
found_polygons_marginals_left, found_polygons_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
all_box_coord_marginals_left, all_box_coord_marginals_right,
slopes, [], slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_seplines,
**kwargs)
def build_pagexml_full_layout(
self,
found_polygons_text_region, found_polygons_text_region_h,
page_coord, order_of_texts, id_of_texts,
all_found_textline_polygons, all_found_textline_polygons_h,
all_box_coord, all_box_coord_h,
found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,
found_polygons_marginals_left,found_polygons_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
all_box_coord_marginals_left, all_box_coord_marginals_right,
slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_seplines,
ocr_all_textlines=None, ocr_all_textlines_h=None,
ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None,
ocr_all_textlines_drop=None,
conf_contours_textregions=None, conf_contours_textregions_h=None,
skip_layout_reading_order=False):
self.logger.debug('enter build_pagexml')
# create the file structure # create the file structure
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
@ -179,191 +138,116 @@ class EynollahXmlWriter():
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
counter = EynollahIdCounter() counter = EynollahIdCounter()
if len(found_polygons_text_region) > 0: if len(order_of_texts):
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] id_of_marginalia_left = [_counter_marginals.next_region_id
id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] for _ in found_polygons_marginals_left]
id_of_marginalia_right = [_counter_marginals.next_region_id
for _ in found_polygons_marginals_right]
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
for mm in range(len(found_polygons_text_region)): for mm, region_contour in enumerate(found_polygons_text_region):
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', textregion = TextRegionType(
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]), id=counter.next_region_id, type_='paragraph',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord,
skip_layout_reading_order))
) )
#textregion.set_conf(conf_contours_textregion[mm]) if conf_contours_textregions:
textregion.Coords.set_conf(conf_contours_textregions[mm])
page.add_TextRegion(textregion) page.add_TextRegion(textregion)
if ocr_all_textlines: if ocr_all_textlines:
ocr_textlines = ocr_all_textlines[mm] ocr_textlines = ocr_all_textlines[mm]
else: else:
ocr_textlines = None ocr_textlines = None
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord,
all_box_coord, slopes, counter, ocr_textlines)
for mm in range(len(found_polygons_marginals_left)):
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
page.add_TextRegion(marginal)
if ocr_all_textlines_marginals_left:
ocr_textlines = ocr_all_textlines_marginals_left[mm]
else:
ocr_textlines = None
#print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) )
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
for mm in range(len(found_polygons_marginals_right)):
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
page.add_TextRegion(marginal)
if ocr_all_textlines_marginals_right:
ocr_textlines = ocr_all_textlines_marginals_right[mm]
else:
ocr_textlines = None
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
for mm in range(len(found_polygons_text_region_img)):
img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType())
page.add_ImageRegion(img_region)
points_co = ''
for lmm in range(len(found_polygons_text_region_img[mm])):
try:
points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
points_co += ' '
except:
points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x ))
points_co += ','
points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y ))
points_co += ' '
img_region.get_Coords().set_points(points_co[:-1])
for mm in range(len(polygons_lines_to_be_written_in_xml)):
sep_hor = SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType())
page.add_SeparatorRegion(sep_hor)
points_co = ''
for lmm in range(len(polygons_lines_to_be_written_in_xml[mm])):
points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,0] ) / self.scale_x))
points_co += ','
points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,1] ) / self.scale_y))
points_co += ' '
sep_hor.get_Coords().set_points(points_co[:-1])
for mm in range(len(found_polygons_tables)):
tab_region = TableRegionType(id=counter.next_region_id, Coords=CoordsType())
page.add_TableRegion(tab_region)
points_co = ''
for lmm in range(len(found_polygons_tables[mm])):
points_co += str(int((found_polygons_tables[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((found_polygons_tables[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
points_co += ' '
tab_region.get_Coords().set_points(points_co[:-1])
return pcgts
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
self.logger.debug('enter build_pagexml_full_layout')
# create the file structure
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
page = pcgts.get_Page()
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
counter = EynollahIdCounter()
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
for mm in range(len(found_polygons_text_region)):
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]))
page.add_TextRegion(textregion)
if ocr_all_textlines:
ocr_textlines = ocr_all_textlines[mm]
else:
ocr_textlines = None
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines)
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
for mm in range(len(found_polygons_text_region_h)): for mm, region_contour in enumerate(found_polygons_text_region_h):
textregion = TextRegionType(id=counter.next_region_id, type_='heading', textregion = TextRegionType(
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) id=counter.next_region_id, type_='heading',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
)
if conf_contours_textregions_h:
textregion.Coords.set_conf(conf_contours_textregions_h[mm])
page.add_TextRegion(textregion) page.add_TextRegion(textregion)
if ocr_all_textlines_h: if ocr_all_textlines_h:
ocr_textlines = ocr_all_textlines_h[mm] ocr_textlines = ocr_all_textlines_h[mm]
else: else:
ocr_textlines = None ocr_textlines = None
self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord,
all_box_coord_h, slopes_h, counter, ocr_textlines)
for mm in range(len(found_polygons_marginals_left)): for mm, region_contour in enumerate(found_polygons_marginals_left):
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', marginal = TextRegionType(
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) id=counter.next_region_id, type_='marginalia',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
)
page.add_TextRegion(marginal) page.add_TextRegion(marginal)
if ocr_all_textlines_marginals_left: if ocr_all_textlines_marginals_left:
ocr_textlines = ocr_all_textlines_marginals_left[mm] ocr_textlines = ocr_all_textlines_marginals_left[mm]
else: else:
ocr_textlines = None ocr_textlines = None
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
for mm in range(len(found_polygons_marginals_right)): for mm, region_contour in enumerate(found_polygons_marginals_right):
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', marginal = TextRegionType(
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) id=counter.next_region_id, type_='marginalia',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
)
page.add_TextRegion(marginal) page.add_TextRegion(marginal)
if ocr_all_textlines_marginals_right: if ocr_all_textlines_marginals_right:
ocr_textlines = ocr_all_textlines_marginals_right[mm] ocr_textlines = ocr_all_textlines_marginals_right[mm]
else: else:
ocr_textlines = None ocr_textlines = None
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord,
all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
for mm in range(len(found_polygons_drop_capitals)): for mm, region_contour in enumerate(found_polygons_drop_capitals):
dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', dropcapital = TextRegionType(
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) id=counter.next_region_id, type_='drop-capital',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
)
page.add_TextRegion(dropcapital) page.add_TextRegion(dropcapital)
all_box_coord_drop = None all_box_coord_drop = [[0, 0, 0, 0]]
slopes_drop = None slopes_drop = [0]
if ocr_all_textlines_drop: if ocr_all_textlines_drop:
ocr_textlines = ocr_all_textlines_drop[mm] ocr_textlines = ocr_all_textlines_drop[mm]
else: else:
ocr_textlines = None ocr_textlines = None
self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines) self.serialize_lines_in_region(dropcapital, [[found_polygons_drop_capitals[mm]]], 0, page_coord,
all_box_coord_drop, slopes_drop, counter, ocr_textlines)
for mm in range(len(found_polygons_text_region_img)): for region_contour in found_polygons_text_region_img:
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) page.add_ImageRegion(
ImageRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))))
for mm in range(len(polygons_lines_to_be_written_in_xml)): for region_contour in polygons_seplines:
page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) page.add_SeparatorRegion(
SeparatorRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, [0, 0, 0, 0]))))
for mm in range(len(found_polygons_tables)): for region_contour in found_polygons_tables:
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) page.add_TableRegion(
TableRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))))
return pcgts return pcgts
def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False): def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
self.logger.debug('enter calculate_polygon_coords') self.logger.debug('enter calculate_polygon_coords')
coords = '' coords = ''
for value_bbox in contour: for point in contour:
if skip_layout_reading_order: if len(point) != 2:
if len(value_bbox) == 2: point = point[0]
coords += str(int((value_bbox[0]) / self.scale_x)) point_x = point[0]
coords += ',' point_y = point[1]
coords += str(int((value_bbox[1]) / self.scale_y)) if not skip_layout_reading_order:
else: point_x += page_coord[2]
coords += str(int((value_bbox[0][0]) / self.scale_x)) point_y += page_coord[0]
coords += ',' point_x = int(point_x / self.scale_x)
coords += str(int((value_bbox[0][1]) / self.scale_y)) point_y = int(point_y / self.scale_y)
else: coords += str(point_x) + ',' + str(point_y) + ' '
if len(value_bbox) == 2:
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
coords += ','
coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
else:
coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
coords += ','
coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
coords=coords + ' '
return coords[:-1] return coords[:-1]

View file

@ -17,21 +17,26 @@ from ocrd_models.constants import NAMESPACES as NS
testdir = Path(__file__).parent.resolve() testdir = Path(__file__).parent.resolve()
MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve())) MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
@pytest.mark.parametrize( @pytest.mark.parametrize(
"options", "options",
[ [
[], # defaults [], # defaults
["--allow_scaling", "--curved-line"], #["--allow_scaling", "--curved-line"],
["--allow_scaling", "--curved-line", "--full-layout"], ["--allow_scaling", "--curved-line", "--full-layout"],
["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"], ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"],
["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based", ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based",
"--textline_light", "--light_version"], "--textline_light", "--light_version"],
# -ep ... # -ep ...
# -eoi ... # -eoi ...
# --do_ocr # FIXME: find out whether OCR extra was installed, otherwise skip these
["--do_ocr"],
["--do_ocr", "--light_version", "--textline_light"],
["--do_ocr", "--transformer_ocr"],
#["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light"],
["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light", "--full-layout"],
# --skip_layout_and_reading_order # --skip_layout_and_reading_order
], ids=str) ], ids=str)
def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options): def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
@ -62,6 +67,44 @@ def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
lines = tree.xpath("//page:TextLine", namespaces=NS) lines = tree.xpath("//page:TextLine", namespaces=NS)
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
@pytest.mark.parametrize(
"options",
[
["--tables"],
["--tables", "--full-layout"],
["--tables", "--full-layout", "--textline_light", "--light_version"],
], ids=str)
def test_run_eynollah_layout_filename2(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/euler_rechenkunst01_1738_0025.tif')
outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml'
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert str(infile) in logmsgs
assert outfile.exists()
tree = page_from_file(str(outfile)).etree
regions = tree.xpath("//page:TextRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
regions = tree.xpath("//page:TableRegion", namespaces=NS)
# model/decoding is not very precise, so (depending on mode) we can get fractures/splits/FP
assert len(regions) >= 1, "result is inaccurate"
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
lines = tree.xpath("//page:TextLine", namespaces=NS)
assert len(lines) >= 2, "result is inaccurate" # mostly table (if detected correctly), but 1 page and 1 catch-word line
def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path

View file

@ -2,6 +2,5 @@ def test_utils_import():
import eynollah.utils import eynollah.utils
import eynollah.utils.contour import eynollah.utils.contour
import eynollah.utils.drop_capitals import eynollah.utils.drop_capitals
import eynollah.utils.drop_capitals
import eynollah.utils.is_nan import eynollah.utils.is_nan
import eynollah.utils.rotate import eynollah.utils.rotate