Merge remote-tracking branch 'bertsky/loky-with-shm-for-175-rebuilt' into prepare-v0.6.0

This commit is contained in:
kba 2025-10-01 20:27:56 +02:00
commit 96eb1c11e6
16 changed files with 1558 additions and 1312 deletions

View file

@ -24,24 +24,39 @@ jobs:
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -h
- uses: actions/checkout@v4
- uses: actions/cache@v4
- uses: actions/cache/restore@v4
id: seg_model_cache
with:
path: models_layout_v0_5_0
key: ${{ runner.os }}-models
- uses: actions/cache@v4
key: seg-models
- uses: actions/cache/restore@v4
id: ocr_model_cache
with:
path: models_ocr_v0_5_0
key: ${{ runner.os }}-models
- uses: actions/cache@v4
path: models_ocr_v0_5_1
key: ocr-models
- uses: actions/cache/restore@v4
id: bin_model_cache
with:
path: default-2021-03-09
key: ${{ runner.os }}-modelbin
key: bin-models
- name: Download models
if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true
run: make models
- uses: actions/cache/save@v4
if: steps.seg_model_cache.outputs.cache-hit != 'true'
with:
path: models_layout_v0_5_0
key: seg-models
- uses: actions/cache/save@v4
if: steps.ocr_model_cache.outputs.cache-hit != 'true'
with:
path: models_ocr_v0_5_1
key: ocr-models
- uses: actions/cache/save@v4
if: steps.bin_model_cache.outputs.cache-hit != 'true'
with:
path: default-2021-03-09
key: bin-models
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:

4
.gitignore vendored
View file

@ -2,7 +2,11 @@
__pycache__
sbb_newspapers_org_image/pylint.log
models_eynollah*
models_ocr*
models_layout*
default-2021-03-09
output.html
/build
/dist
*.tif
TAGS

View file

@ -5,6 +5,33 @@ Versioned according to [Semantic Versioning](http://semver.org/).
## Unreleased
Fixed:
* :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.)
* `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify
* `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring
* `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice
* `get_marginals`: exit early if no peaks found to avoid spurious overlap mask
* `get_smallest_skew`: after shifting search range of rotation angle, use overall best result
* Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR)
* OCR: re-instate missing methods and fix `utils_ocr` function calls
* :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`)
f458e3e
* tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate`
(so CUDA memory gets freed between tests if running on GPU)
Changed:
* polygons: slightly widen for regions and lines, increase for separators
* various refactorings, some code style and identifier improvements
* deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster),
but use shared memory if necessary, and switch back from `loky` to stdlib,
and shutdown in `del()` instead of `atexit`
* :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too
* :fire: writer: use `@type='heading'` instead of `'header'` for headings
* CI: update+improve model caching
## [0.5.0] - 2025-09-26
Fixed:

View file

@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename
RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json
# install everything and reduce image size
RUN make install EXTRAS=OCR && rm -rf /build/eynollah
# fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow)
RUN pip install nvidia-cudnn-cu11==8.6.0.163
# smoke test
RUN eynollah --help

View file

@ -13,12 +13,18 @@ DOCKER ?= docker
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
BIN_MODELFILE = $(notdir $(BIN_MODEL))
BIN_MODELNAME := default-2021-03-09
OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1
OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
PYTEST_ARGS ?= -vv
PYTEST_ARGS ?= -vv --isolate
# BEGIN-EVAL makefile-parser --make-help Makefile
@ -31,7 +37,8 @@ help:
@echo " install Install package with pip"
@echo " install-dev Install editable with pip"
@echo " deps-test Install test dependencies with pip"
@echo " models Download and extract models to $(CURDIR)/models_layout_v0_5_0"
@echo " models Download and extract models to $(CURDIR):"
@echo " $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)"
@echo " smoke-test Run simple CLI check"
@echo " ocrd-test Run OCR-D CLI check"
@echo " test Run unit tests"
@ -42,33 +49,29 @@ help:
@echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]"
@echo " SEG_MODEL URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]"
@echo " BIN_MODEL URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]"
@echo " OCR_MODEL URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]"
@echo ""
# END-EVAL
# Download and extract models to $(PWD)/models_layout_v0_5_0
models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
models_layout_v0_5_0: models_layout_v0_5_0.tar.gz
tar zxf models_layout_v0_5_0.tar.gz
models_layout_v0_5_0.tar.gz:
$(BIN_MODELFILE):
wget -O $@ $(BIN_MODEL)
$(SEG_MODELFILE):
wget -O $@ $(SEG_MODEL)
models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz
tar zxf models_ocr_v0_5_0.tar.gz
models_ocr_v0_5_0.tar.gz:
$(OCR_MODELFILE):
wget -O $@ $(OCR_MODEL)
default-2021-03-09: $(notdir $(BIN_MODEL))
unzip $(notdir $(BIN_MODEL))
$(BIN_MODELNAME): $(BIN_MODELFILE)
mkdir $@
mv $(basename $(notdir $(BIN_MODEL))) $@
$(notdir $(BIN_MODEL)):
wget $(BIN_MODEL)
unzip -d $@ $<
$(SEG_MODELNAME): $(SEG_MODELFILE)
tar zxf $<
$(OCR_MODELNAME): $(OCR_MODELFILE)
tar zxf $<
build:
$(PIP) install build
@ -82,7 +85,10 @@ install:
install-dev:
$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)])
deps-test: models_layout_v0_5_0
ifeq (OCR,$(findstring OCR, $(EXTRAS)))
deps-test: $(OCR_MODELNAME)
endif
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
$(PIP) install -r requirements-test.txt
smoke-test: TMPDIR != mktemp -d
@ -123,9 +129,9 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
$(RM) -r $(TMPDIR)
# Run unit tests
test: export MODELS_LAYOUT=$(CURDIR)/models_layout_v0_5_0
test: export MODELS_OCR=$(CURDIR)/models_ocr_v0_5_0
test: export MODELS_BIN=$(CURDIR)/default-2021-03-09
test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME)
test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME)
test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME)
test:
$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)

View file

@ -1,4 +1,4 @@
pytest
pytest-subtests
pytest-isolate
coverage[toml]
black

View file

@ -5,5 +5,4 @@ scikit-learn >= 0.23.2
tensorflow < 2.13
numba <= 0.58.1
scikit-image
loky
biopython

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,5 @@
from typing import Tuple
from logging import getLogger
import time
import math
@ -298,9 +300,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_end_with_child_without_mother,
new_main_sep_y)
def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
return (box[1], box[1] + box[3],
box[0], box[0] + box[2])
def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]:
return (slice(box[1], box[1] + box[3]),
slice(box[0], box[0] + box[2]))
def crop_image_inside_box(box, img_org_copy):
image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
image_box = img_org_copy[box2slice(box)]
return image_box, box2rect(box)
def otsu_copy_binary(img):
img_r = np.zeros((img.shape[0], img.shape[1], 3))
@ -373,6 +383,10 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
return np.std(z)
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
if not regions_without_separators.any():
return 0, []
#plt.imshow(regions_without_separators)
#plt.show()
regions_without_separators_0 = regions_without_separators.sum(axis=0)
##plt.plot(regions_without_separators_0)
##plt.show()
@ -392,6 +406,9 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
#plt.plot(zneg)
#plt.plot(peaks_neg, zneg[peaks_neg], 'rx')
#plt.show()
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
@ -406,9 +423,13 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
(peaks_neg < (regions_without_separators.shape[1] - 370))]
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
if not interest_pos.any():
return 0, []
# plt.plot(z)
# plt.show()
interest_neg = z[peaks_neg]
if not interest_neg.any():
return 0, []
min_peaks_pos = np.min(interest_pos)
max_peaks_pos = np.max(interest_pos)
@ -955,11 +976,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
regions_model_full.shape[0] // zoom),
interpolation=cv2.INTER_NEAREST)
contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent]
contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent]
###
cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
find_new_features_of_contours(contours_only_text_parent)
find_new_features_of_contours(contours_only_text_parent_z)
length_con=x_max_main-x_min_main
height_con=y_max_main-y_min_main
@ -982,8 +1003,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
contours_only_text_parent_main_d=[]
contours_only_text_parent_head_d=[]
for ii in range(len(contours_only_text_parent)):
con=contours_only_text_parent[ii]
for ii, con in enumerate(contours_only_text_parent_z):
img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3))
img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
@ -992,25 +1012,30 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
(regions_model_full[:,:,0]==2)).sum()
pixels_main = all_pixels - pixels_header
if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
contours_only_text_parent_head.append(con)
if (( pixels_header / float(pixels_main) >= 0.6 and
length_con[ii] / float(height_con[ii]) >= 1.3 and
length_con[ii] / float(height_con[ii]) <= 3 ) or
( pixels_header / float(pixels_main) >= 0.3 and
length_con[ii] / float(height_con[ii]) >=3 )):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2
contours_only_text_parent_head.append(contours_only_text_parent[ii])
conf_contours_head.append(None) # why not conf_contours[ii], too?
if contours_only_text_parent_d_ordered is not None:
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_head.append(all_box_coord[ii])
slopes_head.append(slopes[ii])
all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
conf_contours_head.append(None)
else:
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
contours_only_text_parent_main.append(con)
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1
contours_only_text_parent_main.append(contours_only_text_parent[ii])
conf_contours_main.append(conf_contours[ii])
if contours_only_text_parent_d_ordered is not None:
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_main.append(all_box_coord[ii])
slopes_main.append(slopes[ii])
all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
#print(all_pixels,pixels_main,pixels_header)
### to make it faster
@ -1018,8 +1043,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
# regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
# regions_model_full.shape[0] // zoom),
# interpolation=cv2.INTER_NEAREST)
contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
###
return (regions_model_1,
@ -1626,12 +1649,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
def return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators,
matrix_of_lines_ch,
num_col_classifier, erosion_hurts, tables, right2left_readingorder):
num_col_classifier, erosion_hurts, tables,
right2left_readingorder,
logger=None):
if right2left_readingorder:
regions_without_separators = cv2.flip(regions_without_separators,1)
if logger is None:
logger = getLogger(__package__)
logger.debug('enter return_boxes_of_images_by_order_of_reading_new')
boxes=[]
peaks_neg_tot_tables = []
splitter_y_new = np.array(splitter_y_new, dtype=int)
for i in range(len(splitter_y_new)-1):
#print(splitter_y_new[i],splitter_y_new[i+1])
matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) &
@ -1644,24 +1674,19 @@ def return_boxes_of_images_by_order_of_reading_new(
# 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
if True:
try:
if erosion_hurts:
num_col, peaks_neg_fin = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
num_col_classifier, tables, multiplier=6.)
else:
num_col, peaks_neg_fin = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
num_col_classifier, tables, multiplier=7.)
num_col, peaks_neg_fin = find_num_col(
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.)
except:
peaks_neg_fin=[]
num_col = 0
try:
peaks_neg_fin_org=np.copy(peaks_neg_fin)
if (len(peaks_neg_fin)+1)<num_col_classifier or num_col_classifier==6:
#print('burda')
peaks_neg_fin_org = np.copy(peaks_neg_fin)
if len(peaks_neg_fin)==0:
num_col, peaks_neg_fin = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
num_col_classifier, tables, multiplier=3.)
peaks_neg_fin_early=[]
peaks_neg_fin_early.append(0)
@ -1674,21 +1699,21 @@ def return_boxes_of_images_by_order_of_reading_new(
peaks_neg_fin_rev=[]
for i_n in range(len(peaks_neg_fin_early)-1):
#print(i_n,'i_n')
#plt.plot(regions_without_separators[int(splitter_y_new[i]):
# int(splitter_y_new[i+1]),
#plt.plot(regions_without_separators[splitter_y_new[i]:
# splitter_y_new[i+1],
# peaks_neg_fin_early[i_n]:
# peaks_neg_fin_early[i_n+1]].sum(axis=0) )
#plt.show()
try:
num_col, peaks_neg_fin1 = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
num_col_classifier,tables, multiplier=7.)
except:
peaks_neg_fin1=[]
try:
num_col, peaks_neg_fin2 = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
num_col_classifier,tables, multiplier=5.)
except:
@ -1714,9 +1739,9 @@ def return_boxes_of_images_by_order_of_reading_new(
#print(peaks_neg_fin,'peaks_neg_fin')
except:
pass
logger.exception("cannot find peaks consistent with columns")
#num_col, peaks_neg_fin = find_num_col(
# regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
# regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],:],
# multiplier=7.0)
x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
@ -1738,31 +1763,28 @@ def return_boxes_of_images_by_order_of_reading_new(
y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff)
x_starting = np.array(x_starting)
x_ending = np.array(x_ending)
y_type_2 = np.array(y_type_2)
y_diff_type_2 = np.array(y_diff_type_2)
all_columns = set(range(len(peaks_neg_tot) - 1))
if ((reading_order_type==1) or
(reading_order_type==0 and
(len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))):
try:
y_grenze=int(splitter_y_new[i])+300
y_grenze = splitter_y_new[i] + 300
#check if there is a big separator in this y_mains_sep_ohne_grenzen
args_early_ys=np.arange(len(y_type_2))
#print(args_early_ys,'args_early_ys')
#print(int(splitter_y_new[i]),int(splitter_y_new[i+1]))
#print(splitter_y_new[i], splitter_y_new[i+1])
x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) &
x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) &
x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) &
y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) &
args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
if len(y_type_2_up) > 0:
y_main_separator_up = y_type_2_up [(x_starting_up==0) &
@ -1776,8 +1798,8 @@ def return_boxes_of_images_by_order_of_reading_new(
args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
#print(args_to_be_kept,'args_to_be_kept')
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))])
splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0]
splitter_y_new[i], y_diff_main_separator_up.max()])
splitter_y_new[i] = y_diff_main_separator_up.max()
#print(splitter_y_new[i],'splitter_y_new[i]')
y_type_2 = y_type_2[args_to_be_kept]
@ -1786,29 +1808,28 @@ def return_boxes_of_images_by_order_of_reading_new(
y_diff_type_2 = y_diff_type_2[args_to_be_kept]
#print('galdiha')
y_grenze=int(splitter_y_new[i])+200
y_grenze = splitter_y_new[i] + 200
args_early_ys2=np.arange(len(y_type_2))
y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) &
y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) &
x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) &
x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) &
args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)]
#print(y_type_2_up,x_starting_up,x_ending_up,'didid')
nodes_in = []
nodes_in = set()
for ij in range(len(x_starting_up)):
nodes_in = nodes_in + list(range(int(x_starting_up[ij]),
int(x_ending_up[ij])))
nodes_in = np.unique(nodes_in)
nodes_in.update(range(x_starting_up[ij],
x_ending_up[ij]))
#print(nodes_in,'nodes_in')
if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
if nodes_in == set(range(len(peaks_neg_tot)-1)):
pass
elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)):
elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
pass
else:
#print('burdaydikh')
@ -1823,17 +1844,16 @@ def return_boxes_of_images_by_order_of_reading_new(
pass
#print('burdaydikh2')
elif len(y_diff_main_separator_up)==0:
nodes_in = []
nodes_in = set()
for ij in range(len(x_starting_up)):
nodes_in = nodes_in + list(range(int(x_starting_up[ij]),
int(x_ending_up[ij])))
nodes_in = np.unique(nodes_in)
nodes_in.update(range(x_starting_up[ij],
x_ending_up[ij]))
#print(nodes_in,'nodes_in2')
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
if nodes_in == set(range(len(peaks_neg_tot)-1)):
pass
elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)):
elif nodes_in == set(range(1,len(peaks_neg_tot)-1)):
pass
else:
#print('burdaydikh')
@ -1858,26 +1878,25 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order=[]
if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1:
if reading_order_type==1:
y_lines_by_order.append(int(splitter_y_new[i]))
y_lines_by_order.append(splitter_y_new[i])
x_start_by_order.append(0)
x_end_by_order.append(len(peaks_neg_tot)-2)
else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = []
columns_covered_by_mothers = set()
for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \
list(range(int(x_start_without_mother[dj]),
int(x_end_without_mother[dj])))
columns_covered_by_mothers = list(set(columns_covered_by_mothers))
all_columns=np.arange(len(peaks_neg_tot)-1)
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
columns_covered_by_mothers.update(
range(x_start_without_mother[dj],
x_end_without_mother[dj]))
columns_not_covered = list(all_columns - columns_covered_by_mothers)
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) +
len(x_start_without_mother),
dtype=int) * splitter_y_new[i])
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_starting = np.append(x_starting, np.array(columns_not_covered, int))
x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
x_ending = np.append(x_ending, x_end_without_mother)
ind_args=np.arange(len(y_type_2))
@ -1906,42 +1925,39 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order.append(x_end_column_sort[ii]-1)
else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = []
columns_covered_by_mothers = set()
for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \
list(range(int(x_start_without_mother[dj]),
int(x_end_without_mother[dj])))
columns_covered_by_mothers = list(set(columns_covered_by_mothers))
all_columns=np.arange(len(peaks_neg_tot)-1)
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
columns_covered_by_mothers.update(
range(x_start_without_mother[dj],
x_end_without_mother[dj]))
columns_not_covered = list(all_columns - columns_covered_by_mothers)
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
dtype=int) * splitter_y_new[i])
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_starting = np.append(x_starting, np.array(columns_not_covered, int))
x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
x_ending = np.append(x_ending, x_end_without_mother)
columns_covered_by_with_child_no_mothers = []
columns_covered_by_with_child_no_mothers = set()
for dj in range(len(x_end_with_child_without_mother)):
columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
list(range(int(x_start_with_child_without_mother[dj]),
int(x_end_with_child_without_mother[dj])))
columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers))
all_columns = np.arange(len(peaks_neg_tot)-1)
columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers))
columns_covered_by_with_child_no_mothers.update(
range(x_start_with_child_without_mother[dj],
x_end_with_child_without_mother[dj]))
columns_not_covered_child_no_mother = list(
all_columns - columns_covered_by_with_child_no_mothers)
#indexes_to_be_spanned=[]
for i_s in range(len(x_end_with_child_without_mother)):
columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
ind_args = np.arange(len(y_type_2))
x_end_with_child_without_mother = np.array(x_end_with_child_without_mother)
x_start_with_child_without_mother = np.array(x_start_with_child_without_mother)
x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int)
x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int)
for i_s_nc in columns_not_covered_child_no_mother:
if i_s_nc in x_start_with_child_without_mother:
x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
x_end_biggest_column = \
x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
args_all_biggest_lines = ind_args[(x_starting==i_s_nc) &
(x_ending==x_end_biggest_column)]
y_column_nc = y_type_2[args_all_biggest_lines]
@ -1951,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new(
for i_c in range(len(y_column_nc)):
if i_c==(len(y_column_nc)-1):
ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) &
(y_type_2<int(splitter_y_new[i+1])) &
(y_type_2<splitter_y_new[i+1]) &
(x_starting>=i_s_nc) &
(x_ending<=x_end_biggest_column)]
else:
@ -1967,21 +1983,19 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(x_diff_all_between_nm_wc)>0:
biggest=np.argmax(x_diff_all_between_nm_wc)
columns_covered_by_mothers = []
columns_covered_by_mothers = set()
for dj in range(len(x_starting_all_between_nm_wc)):
columns_covered_by_mothers = columns_covered_by_mothers + \
list(range(int(x_starting_all_between_nm_wc[dj]),
int(x_ending_all_between_nm_wc[dj])))
columns_covered_by_mothers = list(set(columns_covered_by_mothers))
all_columns=np.arange(i_s_nc, x_end_biggest_column)
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
columns_covered_by_mothers.update(
range(x_starting_all_between_nm_wc[dj],
x_ending_all_between_nm_wc[dj]))
child_columns = set(range(i_s_nc, x_end_biggest_column))
columns_not_covered = list(child_columns - columns_covered_by_mothers)
should_longest_line_be_extended=0
if (len(x_diff_all_between_nm_wc) > 0 and
set(list(range(int(x_starting_all_between_nm_wc[biggest]),
int(x_ending_all_between_nm_wc[biggest]))) +
list(columns_not_covered)) != set(all_columns)):
set(list(range(x_starting_all_between_nm_wc[biggest],
x_ending_all_between_nm_wc[biggest])) +
list(columns_not_covered)) != child_columns):
should_longest_line_be_extended=1
index_lines_so_close_to_top_separator = \
np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
@ -1991,9 +2005,12 @@ def return_boxes_of_images_by_order_of_reading_new(
np.array(list(set(list(range(len(y_all_between_nm_wc)))) -
set(list(index_lines_so_close_to_top_separator))))
if len(indexes_remained_after_deleting_closed_lines) > 0:
y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
y_all_between_nm_wc = \
y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
x_starting_all_between_nm_wc = \
x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
x_ending_all_between_nm_wc = \
x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc)
@ -2005,11 +2022,11 @@ def return_boxes_of_images_by_order_of_reading_new(
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
except:
pass
logger.exception("cannot append")
y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered)
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1)
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
for column in range(int(i_s_nc), int(x_end_biggest_column)):
@ -2078,52 +2095,50 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(y_in_cols)>0:
y_down=np.min(y_in_cols)
else:
y_down=[int(splitter_y_new[i+1])][0]
y_down=splitter_y_new[i+1]
#print(y_itself,'y_itself')
boxes.append([peaks_neg_tot[column],
peaks_neg_tot[column+1],
y_itself,
y_down])
except:
logger.exception("cannot assign boxes")
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
int(splitter_y_new[i]), int(splitter_y_new[i+1])])
splitter_y_new[i], splitter_y_new[i+1]])
else:
y_lines_by_order=[]
x_start_by_order=[]
x_end_by_order=[]
if len(x_starting)>0:
all_columns = np.arange(len(peaks_neg_tot)-1)
columns_covered_by_lines_covered_more_than_2col = []
columns_covered_by_lines_covered_more_than_2col = set()
for dj in range(len(x_starting)):
if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns):
pass
else:
columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \
list(range(int(x_starting[dj]),int(x_ending[dj]) ))
columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
if set(range(x_starting[dj], x_ending[dj])) != all_columns:
columns_covered_by_lines_covered_more_than_2col.update(
range(x_starting[dj], x_ending[dj]))
columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col)
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1,
dtype=int) * splitter_y_new[i])
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
if len(new_main_sep_y) > 0:
x_starting = np.append(x_starting, 0)
x_ending = np.append(x_ending, len(peaks_neg_tot)-1)
x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
else:
x_starting = np.append(x_starting, x_starting[0])
x_ending = np.append(x_ending, x_ending[0])
else:
all_columns = np.arange(len(peaks_neg_tot)-1)
columns_not_covered = list(set(all_columns))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
columns_not_covered = list(all_columns)
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered),
dtype=int) * splitter_y_new[i])
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
ind_args=np.array(range(len(y_type_2)))
ind_args = np.arange(len(y_type_2))
for column in range(len(peaks_neg_tot)-1):
#print(column,'column')
@ -2155,7 +2170,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_itself=x_start_copy.pop(il)
x_end_itself=x_end_copy.pop(il)
for column in range(int(x_start_itself), int(x_end_itself)+1):
for column in range(x_start_itself, x_end_itself+1):
#print(column,'cols')
y_in_cols=[]
for yic in range(len(y_copy)):
@ -2169,7 +2184,7 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(y_in_cols)>0:
y_down=np.min(y_in_cols)
else:
y_down=[int(splitter_y_new[i+1])][0]
y_down=splitter_y_new[i+1]
#print(y_itself,'y_itself')
boxes.append([peaks_neg_tot[column],
peaks_neg_tot[column+1],
@ -2191,9 +2206,10 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_new = regions_without_separators.shape[1] - boxes[i][0]
boxes[i][0] = x_start_new
boxes[i][1] = x_end_new
return boxes, peaks_neg_tot_tables_new
else:
return boxes, peaks_neg_tot_tables
peaks_neg_tot_tables = peaks_neg_tot_tables_new
logger.debug('exit return_boxes_of_images_by_order_of_reading_new')
return boxes, peaks_neg_tot_tables
def is_image_filename(fname: str) -> bool:
return fname.lower().endswith(('.jpg',

View file

@ -1,7 +1,15 @@
from typing import Sequence, Union
from numbers import Number
from functools import partial
import itertools
import cv2
import numpy as np
from shapely import geometry
from scipy.sparse.csgraph import minimum_spanning_tree
from shapely.geometry import Polygon, LineString
from shapely.geometry.polygon import orient
from shapely import set_precision
from shapely.ops import unary_union, nearest_points
from .rotate import rotate_image, rotation_image_new
@ -37,29 +45,28 @@ def get_text_region_boxes_by_given_contours(contours):
return boxes, contours_new
def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
found_polygons_early = []
for jv,c in enumerate(contours):
if len(c) < 3: # A polygon cannot have less than 3 points
for jv, contour in enumerate(contours):
if len(contour) < 3: # A polygon cannot have less than 3 points
continue
polygon = geometry.Polygon([point[0] for point in c])
polygon = contour2polygon(contour, dilate=dilate)
area = polygon.area
if (area >= min_area * np.prod(image.shape[:2]) and
area <= max_area * np.prod(image.shape[:2]) and
hierarchy[0][jv][3] == -1):
found_polygons_early.append(np.array([[point]
for point in polygon.exterior.coords], dtype=np.uint))
found_polygons_early.append(polygon2contour(polygon))
return found_polygons_early
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
found_polygons_early = []
for jv,c in enumerate(contours):
if len(c) < 3: # A polygon cannot have less than 3 points
for jv, contour in enumerate(contours):
if len(contour) < 3: # A polygon cannot have less than 3 points
continue
polygon = geometry.Polygon([point[0] for point in c])
# area = cv2.contourArea(c)
polygon = contour2polygon(contour, dilate=dilate)
# area = cv2.contourArea(contour)
area = polygon.area
##print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area
@ -68,9 +75,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
area <= max_area * np.prod(image.shape[:2]) and
# hierarchy[0][jv][3]==-1
True):
# print(c[0][0][1])
found_polygons_early.append(np.array([[point]
for point in polygon.exterior.coords], dtype=np.int32))
# print(contour[0][0][1])
found_polygons_early.append(polygon2contour(polygon))
return found_polygons_early
def find_new_features_of_contours(contours_main):
@ -135,12 +141,12 @@ def return_parent_contours(contours, hierarchy):
if hierarchy[0][i][3] == -1]
return contours_parent
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
cnts_images = (region_pre_p[:, :, 0] == label) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -247,30 +253,26 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
return cont_int[0], index_r_con, confidence_contour
def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map):
def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
if not len(cnts):
return [], []
confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
#cnts = cnts/2
cnts = [(i/6).astype(int) for i in cnts]
results = map(partial(do_back_rotation_and_get_cnt_back,
img=img,
slope_first=slope_first,
confidence_matrix=confidence_matrix,
),
cnts, range(len(cnts)))
contours, indexes, conf_contours = tuple(zip(*results))
return [i*6 for i in contours], list(conf_contours)
def return_contours_of_interested_textline(region_pre_p, pixel):
confidence_matrix = cv2.resize(confidence_matrix,
(img.shape[1] // 6, img.shape[0] // 6),
interpolation=cv2.INTER_NEAREST)
confs = []
for cnt in cnts:
cnt_mask = np.zeros(confidence_matrix.shape)
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
return cnts, confs
def return_contours_of_interested_textline(region_pre_p, label):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
cnts_images = (region_pre_p[:, :, 0] == label) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -293,12 +295,12 @@ def return_contours_of_image(image):
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return contours, hierarchy
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
cnts_images = (region_pre_p[:, :, 0] == label) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -311,12 +313,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si
return contours_imgs
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
cnts_images = (region_pre_p[:, :, 0] == label) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -332,3 +334,97 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
return img_ret[:, :, 0]
def dilate_textline_contours(all_found_textline_polygons):
return [[polygon2contour(contour2polygon(contour, dilate=6))
for contour in region]
for region in all_found_textline_polygons]
def dilate_textregion_contours(all_found_textline_polygons):
return [polygon2contour(contour2polygon(contour, dilate=6))
for contour in all_found_textline_polygons]
def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
polygon = Polygon([point[0] for point in contour])
if dilate:
polygon = polygon.buffer(dilate)
if polygon.geom_type == 'GeometryCollection':
# heterogeneous result: filter zero-area shapes (LineString, Point)
polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
if polygon.geom_type == 'MultiPolygon':
# homogeneous result: construct convex hull to connect
polygon = join_polygons(polygon.geoms)
return make_valid(polygon)
def polygon2contour(polygon: Polygon) -> np.ndarray:
polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
def make_valid(polygon: Polygon) -> Polygon:
"""Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
def isint(x):
return isinstance(x, int) or int(x) == x
# make sure rounding does not invalidate
if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
polygon = Polygon(np.round(polygon.exterior.coords))
points = list(polygon.exterior.coords[:-1])
# try by re-arranging points
for split in range(1, len(points)):
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
break
# simplification may not be possible (at all) due to ordering
# in that case, try another starting point
polygon = Polygon(points[-split:]+points[:-split])
# try by simplification
for tolerance in range(int(polygon.area + 1.5)):
if polygon.is_valid:
break
# simplification may require a larger tolerance
polygon = polygon.simplify(tolerance + 1)
# try by enlarging
for tolerance in range(1, int(polygon.area + 2.5)):
if polygon.is_valid:
break
# enlargement may require a larger tolerance
polygon = polygon.buffer(tolerance)
assert polygon.is_valid, polygon.wkt
return polygon
def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
# ensure input polygons are simply typed and all oriented equally
polygons = [orient(poly)
for poly in itertools.chain.from_iterable(
[poly.geoms
if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
else [poly]
for poly in polygons])]
npoly = len(polygons)
if npoly == 1:
return polygons[0]
# find min-dist path through all polygons (travelling salesman)
pairs = itertools.combinations(range(npoly), 2)
dists = np.zeros((npoly, npoly), dtype=float)
for i, j in pairs:
dist = polygons[i].distance(polygons[j])
if dist < 1e-5:
dist = 1e-5 # if pair merely touches, we still need to get an edge
dists[i, j] = dist
dists[j, i] = dist
dists = minimum_spanning_tree(dists, overwrite=True)
# add bridge polygons (where necessary)
for prevp, nextp in zip(*dists.nonzero()):
prevp = polygons[prevp]
nextp = polygons[nextp]
nearest = nearest_points(prevp, nextp)
bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
polygons.append(bridgep)
jointp = unary_union(polygons)
assert jointp.geom_type == 'Polygon', jointp.wkt
# follow-up calculations will necessarily be integer;
# so anticipate rounding here and then ensure validity
jointp2 = set_precision(jointp, 1.0)
if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
jointp2 = Polygon(np.round(jointp.exterior.coords))
jointp2 = make_valid(jointp2)
assert jointp2.geom_type == 'Polygon', jointp2.wkt
return jointp2

View file

@ -99,6 +99,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
except:
point_left=first_nonzero
if point_left == first_nonzero and point_right == last_nonzero:
return text_regions
if point_right>=mask_marginals.shape[1]:

View file

@ -17,9 +17,12 @@ from .contour import (
return_contours_of_interested_textline,
find_contours_mean_y_diff,
)
from .shm import share_ndarray, wrap_ndarray_shared
from . import (
find_num_col_deskew,
crop_image_inside_box,
box2rect,
box2slice,
)
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -64,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -75,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
clusters_to_be_deleted = []
if len(arg_diff_cluster) > 0:
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
for i in range(len(arg_diff_cluster) - 1):
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
arg_diff_cluster[i + 1] + 1])
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
arg_diff_cluster[i + 1] + 1])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
if len(clusters_to_be_deleted) > 0:
peaks_new_extra = []
for m in range(len(clusters_to_be_deleted)):
@ -176,7 +183,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -236,7 +244,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
try:
neg_peaks_max=np.max(y_padded_smoothed[peaks])
arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -313,23 +322,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_down =y_max_cont-1
##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
#point_up
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_down =y_max_cont-1
##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
#point_up
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./2)
else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
@ -338,7 +360,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
point_down_narrow = img_patch.shape[0] - 2
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances)
@ -465,7 +489,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances)
@ -540,7 +565,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances)
@ -610,7 +636,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg])
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -686,30 +713,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)
##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1
##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
#point_up
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)
##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1
##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
#point_up
# np.max(y_cont)
#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./2)
else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
###-int(dis_to_next_down*1./2)
if point_down_narrow >= img_patch.shape[0]:
point_down_narrow = img_patch.shape[0] - 2
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))]
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances)
xvinside = xv[distances >= 0]
@ -798,7 +845,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next)
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances)
@ -863,7 +911,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down)
distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances)
@ -947,7 +996,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -960,8 +1010,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
if len(arg_diff_cluster) > 0:
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
for i in range(len(arg_diff_cluster) - 1):
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
arg_diff_cluster[i + 1] + 1])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
if len(clusters_to_be_deleted) > 0:
peaks_new_extra = []
for m in range(len(clusters_to_be_deleted)):
@ -1011,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
try:
neg_peaks_max = np.max(y_padded_smoothed[peaks])
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -1287,7 +1341,9 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
return None, cont_final
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
def textline_contours_postprocessing(textline_mask, slope,
contour_text_interest, box_ind,
add_boxes_coor_into_textlines=False):
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
textline_mask = textline_mask.astype(np.uint8)
kernel = np.ones((5, 5), np.uint8)
@ -1347,24 +1403,26 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
return contours_rotated_clean
def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
if logger is None:
logger = getLogger(__package__)
if not np.prod(img_crop.shape):
return img_crop
if num_col == 1:
num_patches = int(img_path.shape[1] / 200.0)
num_patches = int(img_crop.shape[1] / 200.0)
else:
num_patches = int(img_path.shape[1] / 140.0)
# num_patches=int(img_path.shape[1]/200.)
num_patches = int(img_crop.shape[1] / 140.0)
# num_patches=int(img_crop.shape[1]/200.)
if num_patches == 0:
num_patches = 1
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
# plt.imshow(img_patch_ineterst)
# plt.imshow(img_patch_interest)
# plt.show()
length_x = int(img_path.shape[1] / float(num_patches))
length_x = int(img_crop.shape[1] / float(num_patches))
# margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
margin = int(0.04 * length_x)
# if margin<=4:
@ -1372,7 +1430,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
# margin=0
width_mid = length_x - 2 * margin
nxf = img_path.shape[1] / float(width_mid)
nxf = img_crop.shape[1] / float(width_mid)
if nxf > int(nxf):
nxf = int(nxf) + 1
@ -1388,12 +1446,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
index_x_d = i * width_mid
index_x_u = index_x_d + length_x
if index_x_u > img_path.shape[1]:
index_x_u = img_path.shape[1]
index_x_d = img_path.shape[1] - length_x
if index_x_u > img_crop.shape[1]:
index_x_u = img_crop.shape[1]
index_x_d = img_crop.shape[1] - length_x
# img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
img_xline = img_patch_interest[:, index_x_d:index_x_u]
try:
assert img_xline.any()
@ -1409,9 +1467,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
img_line_rotated = rotate_image(img_xline, slope_xline)
img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
img_patch_interest_revised = np.zeros(img_patch_interest.shape)
for i in range(nxf):
if i == 0:
@ -1421,11 +1479,11 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
index_x_d = i * width_mid
index_x_u = index_x_d + length_x
if index_x_u > img_path.shape[1]:
index_x_u = img_path.shape[1]
index_x_d = img_path.shape[1] - length_x
if index_x_u > img_crop.shape[1]:
index_x_u = img_crop.shape[1]
index_x_d = img_crop.shape[1] - length_x
img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
img_xline = img_patch_interest[:, index_x_d:index_x_u]
img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0]
@ -1448,11 +1506,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
return img_patch_ineterst_revised
return img_patch_interest_revised
def do_image_rotation(angle, img, sigma_des, logger=None):
@wrap_ndarray_shared(kw='img')
def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
if logger is None:
logger = getLogger(__package__)
img_rot = rotate_image(img, angle)
@ -1465,7 +1524,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None):
return var
def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
main_page=False, logger=None, plotter=None, map=map):
main_page=False, logger=None, plotter=None, map=None):
if main_page and plotter:
plotter.save_plot_of_textline_density(img_patch_org)
@ -1479,159 +1538,75 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
#img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0],
# int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
angles = np.array([-45, 0, 45, 90,])
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
elif main_page:
angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
#angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4),
np.linspace(-6, 6, n_tot_angles // 2),
np.linspace(7, 12, n_tot_angles // 4)))
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
early_slope_edge=11
if abs(angle) > early_slope_edge:
if angle < 0:
angles = np.linspace(-90, -12, n_tot_angles)
angles2 = np.linspace(-90, -12, n_tot_angles)
else:
angles = np.linspace(90, 12, n_tot_angles)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angles2 = np.linspace(90, 12, n_tot_angles)
angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
if var2 > var:
angle = angle2
else:
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
early_slope_edge=22
if abs(angle) > early_slope_edge:
if angle < 0:
angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
else:
angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
if var2 > var:
angle = angle2
return angle
def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
if logger is None:
logger = getLogger(__package__)
results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles))
if map is None:
results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger)
for angle in angles]
else:
with share_ndarray(img) as img_shared:
results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None),
angles))
if plotter:
plotter.save_plot_of_rotation_angle(angles, results)
try:
var_res = np.array(results)
assert var_res.any()
angle = angles[np.argmax(var_res)]
idx = np.argmax(var_res)
angle = angles[idx]
var = var_res[idx]
except:
logger.exception("cannot determine best angle among %s", str(angles))
angle = 0
return angle
def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100,
main_page=False, logger=None, plotter=None):
if main_page and plotter:
plotter.save_plot_of_textline_density(img_patch_org)
img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
max_shape=np.max(img_int.shape)
img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) ))
onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.)
onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
angles = np.array([-45, 0, 45, 90,])
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
elif main_page:
angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
early_slope_edge=11
if abs(angle) > early_slope_edge:
if angle < 0:
angles = np.linspace(-90, -12, n_tot_angles)
else:
angles = np.linspace(90, 12, n_tot_angles)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
else:
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
early_slope_edge=22
if abs(angle) > early_slope_edge:
if angle < 0:
angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
else:
angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
return angle
def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des):
vars_per_each_subprocess = []
angles_per_each_subprocess = []
for mv in range(len(angles_per_process)):
img_rot=rotate_image(img_resized,angles_per_process[mv])
img_rot[img_rot!=0]=1
try:
var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 )
except:
var_spectrum=0
vars_per_each_subprocess.append(var_spectrum)
angles_per_each_subprocess.append(angles_per_process[mv])
queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess])
def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
num_cores = cpu_count()
queue_of_all_params = Queue()
processes = []
nh = np.linspace(0, len(angles), num_cores + 1)
for i in range(num_cores):
angles_per_process = angles[int(nh[i]) : int(nh[i + 1])]
processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des)))
for i in range(num_cores):
processes[i].start()
var_res=[]
all_angles = []
for i in range(num_cores):
list_all_par = queue_of_all_params.get(True)
vars_for_subprocess = list_all_par[0]
angles_sub_process = list_all_par[1]
for j in range(len(vars_for_subprocess)):
var_res.append(vars_for_subprocess[j])
all_angles.append(angles_sub_process[j])
for i in range(num_cores):
processes[i].join()
if plotter:
plotter.save_plot_of_rotation_angle(all_angles, var_res)
try:
var_res=np.array(var_res)
ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
except:
ang_int=0
return ang_int
var = 0
return angle, var
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new(
box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, slope_deskew,
textline_mask_tot_ea=None, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
):
if KERNEL is None:
@ -1641,7 +1616,7 @@ def do_work_of_slopes_new(
logger.debug('enter do_work_of_slopes_new')
x, y, w, h = box_text
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
crop_coor = box2rect(box_text)
mask_textline = np.zeros(textline_mask_tot_ea.shape)
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
all_text_region_raw = textline_mask_tot_ea * mask_textline
@ -1649,7 +1624,7 @@ def do_work_of_slopes_new(
img_int_p = all_text_region_raw[:,:]
img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
slope = 0
slope_for_all = slope_deskew
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
@ -1687,9 +1662,12 @@ def do_work_of_slopes_new(
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
@wrap_ndarray_shared(kw='mask_texts_only')
def do_work_of_slopes_new_curved(
box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
textline_mask_tot_ea=None, mask_texts_only=None,
num_col=1, scale_par=1.0, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
):
if KERNEL is None:
@ -1706,7 +1684,7 @@ def do_work_of_slopes_new_curved(
# plt.imshow(img_int_p)
# plt.show()
if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
slope = 0
slope_for_all = slope_deskew
else:
@ -1732,7 +1710,7 @@ def do_work_of_slopes_new_curved(
slope_for_all = slope_deskew
slope = slope_for_all
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
crop_coor = box2rect(box_text)
if abs(slope_for_all) < 45:
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
@ -1765,20 +1743,25 @@ def do_work_of_slopes_new_curved(
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
pixel_img = 1
mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
mask_biggest2 = resize_image(mask_biggest2,
int(mask_biggest2.shape[0] * scale_par),
int(mask_biggest2.shape[1] * scale_par))
cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
try:
textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
except Exception as why:
logger.error(why)
else:
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True)
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw,
slope_for_all, contour_par,
box_text, True)
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new_light(
box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,
textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
logger=None
):
if logger is None:
@ -1786,7 +1769,7 @@ def do_work_of_slopes_new_light(
logger.debug('enter do_work_of_slopes_new_light')
x, y, w, h = box_text
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
crop_coor = box2rect(box_text)
mask_textline = np.zeros(textline_mask_tot_ea.shape)
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
all_text_region_raw = textline_mask_tot_ea * mask_textline

45
src/eynollah/utils/shm.py Normal file
View file

@ -0,0 +1,45 @@
from multiprocessing import shared_memory
from contextlib import contextmanager
from functools import wraps
import numpy as np
@contextmanager
def share_ndarray(array: np.ndarray):
size = np.dtype(array.dtype).itemsize * np.prod(array.shape)
shm = shared_memory.SharedMemory(create=True, size=size)
try:
shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf)
shared_array[:] = array[:]
shared_array.flags["WRITEABLE"] = False
yield dict(shape=array.shape, dtype=array.dtype, name=shm.name)
finally:
shm.close()
shm.unlink()
@contextmanager
def ndarray_shared(array: dict):
shm = shared_memory.SharedMemory(name=array['name'])
try:
array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf)
yield array
finally:
shm.close()
def wrap_ndarray_shared(kw=None):
def wrapper(f):
if kw is None:
@wraps(f)
def shared_func(array, *args, **kwargs):
with ndarray_shared(array) as ndarray:
return f(ndarray, *args, **kwargs)
return shared_func
else:
@wraps(f)
def shared_func(*args, **kwargs):
array = kwargs.pop(kw)
with ndarray_shared(array) as ndarray:
kwargs[kw] = ndarray
return f(*args, **kwargs)
return shared_func
return wrapper

View file

@ -92,6 +92,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t
return peaks_final
else:
return None
# Function to fit text inside the given area
def fit_text_single_line(draw, text, font_path, max_width, max_height):
initial_font_size = 50
@ -369,7 +370,11 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind
return textline_contour
def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons,
prediction_model,
b_s_ocr, num_to_char,
textline_light=False,
curved_line=False):
max_len = 512
padding_token = 299
image_width = 512#max_len * 4
@ -425,17 +430,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
if splited_images:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0],
image_height,
image_width)
cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(1)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1],
image_height,
image_width)
cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(-1)
else:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop,
image_height,
image_width)
cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(0)
@ -468,7 +479,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
extracted_texts.append(pred_texts_ib)
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
extracted_texts_merged = [extracted_texts[ind]
if cropped_lines_meging_indexing[ind]==0
else extracted_texts[ind]+" "+extracted_texts[ind+1]
if cropped_lines_meging_indexing[ind]==1
else None
for ind in range(len(cropped_lines_meging_indexing))]
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)

View file

@ -289,7 +289,7 @@ class EynollahXmlWriter():
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
for mm in range(len(found_polygons_text_region_h)):
textregion = TextRegionType(id=counter.next_region_id, type_='header',
textregion = TextRegionType(id=counter.next_region_id, type_='heading',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
page.add_TextRegion(textregion)
@ -335,7 +335,7 @@ class EynollahXmlWriter():
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
for mm in range(len(polygons_lines_to_be_written_in_xml)):
page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
for mm in range(len(found_polygons_tables)):
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))

View file

@ -20,23 +20,9 @@ MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
for options in [
@pytest.mark.parametrize(
"options",
[
[], # defaults
["--allow_scaling", "--curved-line"],
["--allow_scaling", "--curved-line", "--full-layout"],
@ -47,22 +33,34 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
# -eoi ...
# --do_ocr
# --skip_layout_and_reading_order
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert str(infile) in logmsgs
assert outfile.exists()
tree = page_from_file(str(outfile)).etree
regions = tree.xpath("//page:TextRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
lines = tree.xpath("//page:TextLine", namespaces=NS)
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
], ids=str)
def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert str(infile) in logmsgs
assert outfile.exists()
tree = page_from_file(str(outfile)).etree
regions = tree.xpath("//page:TextRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
lines = tree.xpath("//page:TextLine", namespaces=NS)
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources')
@ -86,7 +84,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog):
@pytest.mark.parametrize(
"options",
[
[], # defaults
["--no-patches"],
], ids=str)
def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [
@ -100,25 +104,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer'
runner = CliRunner()
for options in [
[], # defaults
["--no-patches"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as binarized_img:
binarized_size = binarized_img.size
assert original_size == binarized_size
with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as binarized_img:
binarized_size = binarized_img.size
assert original_size == binarized_size
def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog):
def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources')
outdir = tmp_path
args = [
@ -139,15 +137,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):
@pytest.mark.parametrize(
"options",
[
[], # defaults
["-sos"],
], ids=str)
def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
@ -155,25 +157,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
def only_eynollah(logrec):
return logrec.name == 'enhancement'
runner = CliRunner()
for options in [
[], # defaults
["-sos"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as enhanced_img:
enhanced_size = enhanced_img.size
assert (original_size == enhanced_size) == ("-sos" in options)
with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as enhanced_img:
enhanced_size = enhanced_img.size
assert (original_size == enhanced_size) == ("-sos" in options)
def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):
def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources')
outdir = tmp_path
args = [
@ -194,7 +190,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):
def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
args = [
@ -223,7 +219,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
#assert in_order != out_order
assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):
def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources')
outdir = tmp_path
args = [
@ -245,7 +241,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
@pytest.mark.parametrize(
"options",
[
[], # defaults
["-doit", #str(outrenderfile.parent)],
],
["-trocr"],
], ids=str)
def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
@ -255,8 +259,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
'-i', str(infile),
'-dx', str(infile.parent),
'-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
@ -264,33 +266,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
for options in [
# kba Fri Sep 26 12:53:49 CEST 2025
# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
# [], # defaults
# ["-doit", str(outrenderfile.parent)],
["-trocr"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah):
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
# FIXME: ocr has no logging!
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
assert outfile.exists()
if "-doit" in options:
assert outrenderfile.exists()
#in_tree = page_from_file(str(infile)).etree
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
out_tree = page_from_file(str(outfile)).etree
out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
if "-doit" in options:
options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
with caplog.filtering(only_eynollah):
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
# FIXME: ocr has no logging!
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
assert outfile.exists()
if "-doit" in options:
assert outrenderfile.exists()
#in_tree = page_from_file(str(infile)).etree
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
out_tree = page_from_file(str(outfile)).etree
out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged")
def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources')
outdir = tmp_path
args = [