Merge remote-tracking branch 'bertsky/loky-with-shm-for-175-rebuilt' into prepare-v0.6.0

This commit is contained in:
kba 2025-10-01 20:27:56 +02:00
commit 96eb1c11e6
16 changed files with 1558 additions and 1312 deletions

View file

@ -24,24 +24,39 @@ jobs:
sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -h df -h
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/cache@v4 - uses: actions/cache/restore@v4
id: seg_model_cache id: seg_model_cache
with: with:
path: models_layout_v0_5_0 path: models_layout_v0_5_0
key: ${{ runner.os }}-models key: seg-models
- uses: actions/cache@v4 - uses: actions/cache/restore@v4
id: ocr_model_cache id: ocr_model_cache
with: with:
path: models_ocr_v0_5_0 path: models_ocr_v0_5_1
key: ${{ runner.os }}-models key: ocr-models
- uses: actions/cache@v4 - uses: actions/cache/restore@v4
id: bin_model_cache id: bin_model_cache
with: with:
path: default-2021-03-09 path: default-2021-03-09
key: ${{ runner.os }}-modelbin key: bin-models
- name: Download models - name: Download models
if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true if: steps.seg_model_cache.outputs.cache-hit != 'true' || steps.bin_model_cache.outputs.cache-hit != 'true' || steps.ocr_model_cache.outputs.cache-hit != true
run: make models run: make models
- uses: actions/cache/save@v4
if: steps.seg_model_cache.outputs.cache-hit != 'true'
with:
path: models_layout_v0_5_0
key: seg-models
- uses: actions/cache/save@v4
if: steps.ocr_model_cache.outputs.cache-hit != 'true'
with:
path: models_ocr_v0_5_1
key: ocr-models
- uses: actions/cache/save@v4
if: steps.bin_model_cache.outputs.cache-hit != 'true'
with:
path: default-2021-03-09
key: bin-models
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5 uses: actions/setup-python@v5
with: with:

4
.gitignore vendored
View file

@ -2,7 +2,11 @@
__pycache__ __pycache__
sbb_newspapers_org_image/pylint.log sbb_newspapers_org_image/pylint.log
models_eynollah* models_eynollah*
models_ocr*
models_layout*
default-2021-03-09
output.html output.html
/build /build
/dist /dist
*.tif *.tif
TAGS

View file

@ -5,6 +5,33 @@ Versioned according to [Semantic Versioning](http://semver.org/).
## Unreleased ## Unreleased
Fixed:
* :fire: polygons: avoid invalid paths (use `Polygon.buffer()` instead of dilation etc.)
* `return_boxes_of_images_by_order_of_reading_new`: avoid Numpy.dtype mismatch, simplify
* `return_boxes_of_images_by_order_of_reading_new`: log any exceptions instead of ignoring
* `filter_contours_without_textline_inside`: avoid removing from duplicate lists twice
* `get_marginals`: exit early if no peaks found to avoid spurious overlap mask
* `get_smallest_skew`: after shifting search range of rotation angle, use overall best result
* Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR)
* OCR: re-instate missing methods and fix `utils_ocr` function calls
* :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`)
f458e3e
* tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate`
(so CUDA memory gets freed between tests if running on GPU)
Changed:
* polygons: slightly widen for regions and lines, increase for separators
* various refactorings, some code style and identifier improvements
* deskewing/multiprocessing: switch back to ProcessPoolExecutor (faster),
but use shared memory if necessary, and switch back from `loky` to stdlib,
and shutdown in `del()` instead of `atexit`
* :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too
* :fire: writer: use `@type='heading'` instead of `'header'` for headings
* CI: update+improve model caching
## [0.5.0] - 2025-09-26 ## [0.5.0] - 2025-09-26
Fixed: Fixed:

View file

@ -40,6 +40,8 @@ RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename
RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json
# install everything and reduce image size # install everything and reduce image size
RUN make install EXTRAS=OCR && rm -rf /build/eynollah RUN make install EXTRAS=OCR && rm -rf /build/eynollah
# fixup for broken cuDNN installation (Torch pulls in 8.5.0, which is incompatible with Tensorflow)
RUN pip install nvidia-cudnn-cu11==8.6.0.163
# smoke test # smoke test
RUN eynollah --help RUN eynollah --help

View file

@ -13,12 +13,18 @@ DOCKER ?= docker
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
BIN_MODELFILE = $(notdir $(BIN_MODEL))
BIN_MODELNAME := default-2021-03-09
OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1 OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
PYTEST_ARGS ?= -vv PYTEST_ARGS ?= -vv --isolate
# BEGIN-EVAL makefile-parser --make-help Makefile # BEGIN-EVAL makefile-parser --make-help Makefile
@ -31,7 +37,8 @@ help:
@echo " install Install package with pip" @echo " install Install package with pip"
@echo " install-dev Install editable with pip" @echo " install-dev Install editable with pip"
@echo " deps-test Install test dependencies with pip" @echo " deps-test Install test dependencies with pip"
@echo " models Download and extract models to $(CURDIR)/models_layout_v0_5_0" @echo " models Download and extract models to $(CURDIR):"
@echo " $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)"
@echo " smoke-test Run simple CLI check" @echo " smoke-test Run simple CLI check"
@echo " ocrd-test Run OCR-D CLI check" @echo " ocrd-test Run OCR-D CLI check"
@echo " test Run unit tests" @echo " test Run unit tests"
@ -42,33 +49,29 @@ help:
@echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]" @echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]"
@echo " SEG_MODEL URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]" @echo " SEG_MODEL URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]"
@echo " BIN_MODEL URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]" @echo " BIN_MODEL URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]"
@echo " OCR_MODEL URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]"
@echo "" @echo ""
# END-EVAL # END-EVAL
# Download and extract models to $(PWD)/models_layout_v0_5_0 # Download and extract models to $(PWD)/models_layout_v0_5_0
models: models_layout_v0_5_0 models_ocr_v0_5_0 default-2021-03-09 models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
models_layout_v0_5_0: models_layout_v0_5_0.tar.gz $(BIN_MODELFILE):
tar zxf models_layout_v0_5_0.tar.gz wget -O $@ $(BIN_MODEL)
$(SEG_MODELFILE):
models_layout_v0_5_0.tar.gz:
wget -O $@ $(SEG_MODEL) wget -O $@ $(SEG_MODEL)
$(OCR_MODELFILE):
models_ocr_v0_5_0: models_ocr_v0_5_0.tar.gz
tar zxf models_ocr_v0_5_0.tar.gz
models_ocr_v0_5_0.tar.gz:
wget -O $@ $(OCR_MODEL) wget -O $@ $(OCR_MODEL)
default-2021-03-09: $(notdir $(BIN_MODEL)) $(BIN_MODELNAME): $(BIN_MODELFILE)
unzip $(notdir $(BIN_MODEL))
mkdir $@ mkdir $@
mv $(basename $(notdir $(BIN_MODEL))) $@ unzip -d $@ $<
$(SEG_MODELNAME): $(SEG_MODELFILE)
$(notdir $(BIN_MODEL)): tar zxf $<
wget $(BIN_MODEL) $(OCR_MODELNAME): $(OCR_MODELFILE)
tar zxf $<
build: build:
$(PIP) install build $(PIP) install build
@ -82,7 +85,10 @@ install:
install-dev: install-dev:
$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)]) $(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)])
deps-test: models_layout_v0_5_0 ifeq (OCR,$(findstring OCR, $(EXTRAS)))
deps-test: $(OCR_MODELNAME)
endif
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
$(PIP) install -r requirements-test.txt $(PIP) install -r requirements-test.txt
smoke-test: TMPDIR != mktemp -d smoke-test: TMPDIR != mktemp -d
@ -123,9 +129,9 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
$(RM) -r $(TMPDIR) $(RM) -r $(TMPDIR)
# Run unit tests # Run unit tests
test: export MODELS_LAYOUT=$(CURDIR)/models_layout_v0_5_0 test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME)
test: export MODELS_OCR=$(CURDIR)/models_ocr_v0_5_0 test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME)
test: export MODELS_BIN=$(CURDIR)/default-2021-03-09 test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME)
test: test:
$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS) $(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)

View file

@ -1,4 +1,4 @@
pytest pytest
pytest-subtests pytest-isolate
coverage[toml] coverage[toml]
black black

View file

@ -5,5 +5,4 @@ scikit-learn >= 0.23.2
tensorflow < 2.13 tensorflow < 2.13
numba <= 0.58.1 numba <= 0.58.1
scikit-image scikit-image
loky
biopython biopython

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,5 @@
from typing import Tuple
from logging import getLogger
import time import time
import math import math
@ -298,9 +300,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_end_with_child_without_mother, x_end_with_child_without_mother,
new_main_sep_y) new_main_sep_y)
def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
return (box[1], box[1] + box[3],
box[0], box[0] + box[2])
def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]:
return (slice(box[1], box[1] + box[3]),
slice(box[0], box[0] + box[2]))
def crop_image_inside_box(box, img_org_copy): def crop_image_inside_box(box, img_org_copy):
image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] image_box = img_org_copy[box2slice(box)]
return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] return image_box, box2rect(box)
def otsu_copy_binary(img): def otsu_copy_binary(img):
img_r = np.zeros((img.shape[0], img.shape[1], 3)) img_r = np.zeros((img.shape[0], img.shape[1], 3))
@ -373,6 +383,10 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
return np.std(z) return np.std(z)
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
if not regions_without_separators.any():
return 0, []
#plt.imshow(regions_without_separators)
#plt.show()
regions_without_separators_0 = regions_without_separators.sum(axis=0) regions_without_separators_0 = regions_without_separators.sum(axis=0)
##plt.plot(regions_without_separators_0) ##plt.plot(regions_without_separators_0)
##plt.show() ##plt.show()
@ -392,6 +406,9 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
zneg = gaussian_filter1d(zneg, sigma_) zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0) peaks_neg, _ = find_peaks(zneg, height=0)
#plt.plot(zneg)
#plt.plot(peaks_neg, zneg[peaks_neg], 'rx')
#plt.show()
peaks, _ = find_peaks(z, height=0) peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10 peaks_neg = peaks_neg - 10 - 10
@ -406,9 +423,13 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
(peaks_neg < (regions_without_separators.shape[1] - 370))] (peaks_neg < (regions_without_separators.shape[1] - 370))]
interest_pos = z[peaks] interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10] interest_pos = interest_pos[interest_pos > 10]
if not interest_pos.any():
return 0, []
# plt.plot(z) # plt.plot(z)
# plt.show() # plt.show()
interest_neg = z[peaks_neg] interest_neg = z[peaks_neg]
if not interest_neg.any():
return 0, []
min_peaks_pos = np.min(interest_pos) min_peaks_pos = np.min(interest_pos)
max_peaks_pos = np.max(interest_pos) max_peaks_pos = np.max(interest_pos)
@ -955,11 +976,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
regions_model_full.shape[0] // zoom), regions_model_full.shape[0] // zoom),
interpolation=cv2.INTER_NEAREST) interpolation=cv2.INTER_NEAREST)
contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent]
### ###
cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
find_new_features_of_contours(contours_only_text_parent) find_new_features_of_contours(contours_only_text_parent_z)
length_con=x_max_main-x_min_main length_con=x_max_main-x_min_main
height_con=y_max_main-y_min_main height_con=y_max_main-y_min_main
@ -982,8 +1003,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
contours_only_text_parent_main_d=[] contours_only_text_parent_main_d=[]
contours_only_text_parent_head_d=[] contours_only_text_parent_head_d=[]
for ii in range(len(contours_only_text_parent)): for ii, con in enumerate(contours_only_text_parent_z):
con=contours_only_text_parent[ii]
img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3))
img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
@ -992,25 +1012,30 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
(regions_model_full[:,:,0]==2)).sum() (regions_model_full[:,:,0]==2)).sum()
pixels_main = all_pixels - pixels_header pixels_main = all_pixels - pixels_header
if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ): if (( pixels_header / float(pixels_main) >= 0.6 and
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 length_con[ii] / float(height_con[ii]) >= 1.3 and
contours_only_text_parent_head.append(con) length_con[ii] / float(height_con[ii]) <= 3 ) or
( pixels_header / float(pixels_main) >= 0.3 and
length_con[ii] / float(height_con[ii]) >=3 )):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2
contours_only_text_parent_head.append(contours_only_text_parent[ii])
conf_contours_head.append(None) # why not conf_contours[ii], too?
if contours_only_text_parent_d_ordered is not None: if contours_only_text_parent_d_ordered is not None:
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_head.append(all_box_coord[ii]) all_box_coord_head.append(all_box_coord[ii])
slopes_head.append(slopes[ii]) slopes_head.append(slopes[ii])
all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
conf_contours_head.append(None)
else: else:
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1
contours_only_text_parent_main.append(con) contours_only_text_parent_main.append(contours_only_text_parent[ii])
conf_contours_main.append(conf_contours[ii]) conf_contours_main.append(conf_contours[ii])
if contours_only_text_parent_d_ordered is not None: if contours_only_text_parent_d_ordered is not None:
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_main.append(all_box_coord[ii]) all_box_coord_main.append(all_box_coord[ii])
slopes_main.append(slopes[ii]) slopes_main.append(slopes[ii])
all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
#print(all_pixels,pixels_main,pixels_header) #print(all_pixels,pixels_main,pixels_header)
### to make it faster ### to make it faster
@ -1018,8 +1043,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
# regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
# regions_model_full.shape[0] // zoom), # regions_model_full.shape[0] // zoom),
# interpolation=cv2.INTER_NEAREST) # interpolation=cv2.INTER_NEAREST)
contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
### ###
return (regions_model_1, return (regions_model_1,
@ -1626,12 +1649,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
def return_boxes_of_images_by_order_of_reading_new( def return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators, splitter_y_new, regions_without_separators,
matrix_of_lines_ch, matrix_of_lines_ch,
num_col_classifier, erosion_hurts, tables, right2left_readingorder): num_col_classifier, erosion_hurts, tables,
right2left_readingorder,
logger=None):
if right2left_readingorder: if right2left_readingorder:
regions_without_separators = cv2.flip(regions_without_separators,1) regions_without_separators = cv2.flip(regions_without_separators,1)
if logger is None:
logger = getLogger(__package__)
logger.debug('enter return_boxes_of_images_by_order_of_reading_new')
boxes=[] boxes=[]
peaks_neg_tot_tables = [] peaks_neg_tot_tables = []
splitter_y_new = np.array(splitter_y_new, dtype=int)
for i in range(len(splitter_y_new)-1): for i in range(len(splitter_y_new)-1):
#print(splitter_y_new[i],splitter_y_new[i+1]) #print(splitter_y_new[i],splitter_y_new[i+1])
matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) &
@ -1644,24 +1674,19 @@ def return_boxes_of_images_by_order_of_reading_new(
# 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))): # 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
if True: if True:
try: try:
if erosion_hurts: num_col, peaks_neg_fin = find_num_col(
num_col, peaks_neg_fin = find_num_col( regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.)
num_col_classifier, tables, multiplier=6.)
else:
num_col, peaks_neg_fin = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
num_col_classifier, tables, multiplier=7.)
except: except:
peaks_neg_fin=[] peaks_neg_fin=[]
num_col = 0 num_col = 0
try: try:
peaks_neg_fin_org=np.copy(peaks_neg_fin)
if (len(peaks_neg_fin)+1)<num_col_classifier or num_col_classifier==6: if (len(peaks_neg_fin)+1)<num_col_classifier or num_col_classifier==6:
#print('burda') #print('burda')
peaks_neg_fin_org = np.copy(peaks_neg_fin)
if len(peaks_neg_fin)==0: if len(peaks_neg_fin)==0:
num_col, peaks_neg_fin = find_num_col( num_col, peaks_neg_fin = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
num_col_classifier, tables, multiplier=3.) num_col_classifier, tables, multiplier=3.)
peaks_neg_fin_early=[] peaks_neg_fin_early=[]
peaks_neg_fin_early.append(0) peaks_neg_fin_early.append(0)
@ -1674,21 +1699,21 @@ def return_boxes_of_images_by_order_of_reading_new(
peaks_neg_fin_rev=[] peaks_neg_fin_rev=[]
for i_n in range(len(peaks_neg_fin_early)-1): for i_n in range(len(peaks_neg_fin_early)-1):
#print(i_n,'i_n') #print(i_n,'i_n')
#plt.plot(regions_without_separators[int(splitter_y_new[i]): #plt.plot(regions_without_separators[splitter_y_new[i]:
# int(splitter_y_new[i+1]), # splitter_y_new[i+1],
# peaks_neg_fin_early[i_n]: # peaks_neg_fin_early[i_n]:
# peaks_neg_fin_early[i_n+1]].sum(axis=0) ) # peaks_neg_fin_early[i_n+1]].sum(axis=0) )
#plt.show() #plt.show()
try: try:
num_col, peaks_neg_fin1 = find_num_col( num_col, peaks_neg_fin1 = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]), regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]], peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
num_col_classifier,tables, multiplier=7.) num_col_classifier,tables, multiplier=7.)
except: except:
peaks_neg_fin1=[] peaks_neg_fin1=[]
try: try:
num_col, peaks_neg_fin2 = find_num_col( num_col, peaks_neg_fin2 = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]), regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]], peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
num_col_classifier,tables, multiplier=5.) num_col_classifier,tables, multiplier=5.)
except: except:
@ -1714,9 +1739,9 @@ def return_boxes_of_images_by_order_of_reading_new(
#print(peaks_neg_fin,'peaks_neg_fin') #print(peaks_neg_fin,'peaks_neg_fin')
except: except:
pass logger.exception("cannot find peaks consistent with columns")
#num_col, peaks_neg_fin = find_num_col( #num_col, peaks_neg_fin = find_num_col(
# regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], # regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],:],
# multiplier=7.0) # multiplier=7.0)
x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ] x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ] x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
@ -1738,31 +1763,28 @@ def return_boxes_of_images_by_order_of_reading_new(
y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \ y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order( new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff) x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff)
x_starting = np.array(x_starting)
x_ending = np.array(x_ending)
y_type_2 = np.array(y_type_2)
y_diff_type_2 = np.array(y_diff_type_2)
all_columns = set(range(len(peaks_neg_tot) - 1))
if ((reading_order_type==1) or if ((reading_order_type==1) or
(reading_order_type==0 and (reading_order_type==0 and
(len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))): (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))):
try: try:
y_grenze=int(splitter_y_new[i])+300 y_grenze = splitter_y_new[i] + 300
#check if there is a big separator in this y_mains_sep_ohne_grenzen #check if there is a big separator in this y_mains_sep_ohne_grenzen
args_early_ys=np.arange(len(y_type_2)) args_early_ys=np.arange(len(y_type_2))
#print(args_early_ys,'args_early_ys') #print(args_early_ys,'args_early_ys')
#print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) #print(splitter_y_new[i], splitter_y_new[i+1])
x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) & x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) & x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) & y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) & args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
if len(y_type_2_up) > 0: if len(y_type_2_up) > 0:
y_main_separator_up = y_type_2_up [(x_starting_up==0) & y_main_separator_up = y_type_2_up [(x_starting_up==0) &
@ -1776,8 +1798,8 @@ def return_boxes_of_images_by_order_of_reading_new(
args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) )) args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
#print(args_to_be_kept,'args_to_be_kept') #print(args_to_be_kept,'args_to_be_kept')
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))]) splitter_y_new[i], y_diff_main_separator_up.max()])
splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] splitter_y_new[i] = y_diff_main_separator_up.max()
#print(splitter_y_new[i],'splitter_y_new[i]') #print(splitter_y_new[i],'splitter_y_new[i]')
y_type_2 = y_type_2[args_to_be_kept] y_type_2 = y_type_2[args_to_be_kept]
@ -1786,29 +1808,28 @@ def return_boxes_of_images_by_order_of_reading_new(
y_diff_type_2 = y_diff_type_2[args_to_be_kept] y_diff_type_2 = y_diff_type_2[args_to_be_kept]
#print('galdiha') #print('galdiha')
y_grenze=int(splitter_y_new[i])+200 y_grenze = splitter_y_new[i] + 200
args_early_ys2=np.arange(len(y_type_2)) args_early_ys2=np.arange(len(y_type_2))
y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) & y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) & x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) & x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) &
(y_type_2 <= y_grenze)] (y_type_2 <= y_grenze)]
#print(y_type_2_up,x_starting_up,x_ending_up,'didid') #print(y_type_2_up,x_starting_up,x_ending_up,'didid')
nodes_in = [] nodes_in = set()
for ij in range(len(x_starting_up)): for ij in range(len(x_starting_up)):
nodes_in = nodes_in + list(range(int(x_starting_up[ij]), nodes_in.update(range(x_starting_up[ij],
int(x_ending_up[ij]))) x_ending_up[ij]))
nodes_in = np.unique(nodes_in)
#print(nodes_in,'nodes_in') #print(nodes_in,'nodes_in')
if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): if nodes_in == set(range(len(peaks_neg_tot)-1)):
pass pass
elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)): elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
pass pass
else: else:
#print('burdaydikh') #print('burdaydikh')
@ -1823,17 +1844,16 @@ def return_boxes_of_images_by_order_of_reading_new(
pass pass
#print('burdaydikh2') #print('burdaydikh2')
elif len(y_diff_main_separator_up)==0: elif len(y_diff_main_separator_up)==0:
nodes_in = [] nodes_in = set()
for ij in range(len(x_starting_up)): for ij in range(len(x_starting_up)):
nodes_in = nodes_in + list(range(int(x_starting_up[ij]), nodes_in.update(range(x_starting_up[ij],
int(x_ending_up[ij]))) x_ending_up[ij]))
nodes_in = np.unique(nodes_in)
#print(nodes_in,'nodes_in2') #print(nodes_in,'nodes_in2')
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): if nodes_in == set(range(len(peaks_neg_tot)-1)):
pass pass
elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)): elif nodes_in == set(range(1,len(peaks_neg_tot)-1)):
pass pass
else: else:
#print('burdaydikh') #print('burdaydikh')
@ -1858,26 +1878,25 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order=[] x_end_by_order=[]
if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1:
if reading_order_type==1: if reading_order_type==1:
y_lines_by_order.append(int(splitter_y_new[i])) y_lines_by_order.append(splitter_y_new[i])
x_start_by_order.append(0) x_start_by_order.append(0)
x_end_by_order.append(len(peaks_neg_tot)-2) x_end_by_order.append(len(peaks_neg_tot)-2)
else: else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = [] columns_covered_by_mothers = set()
for dj in range(len(x_start_without_mother)): for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers.update(
list(range(int(x_start_without_mother[dj]), range(x_start_without_mother[dj],
int(x_end_without_mother[dj]))) x_end_without_mother[dj]))
columns_covered_by_mothers = list(set(columns_covered_by_mothers)) columns_not_covered = list(all_columns - columns_covered_by_mothers)
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) +
all_columns=np.arange(len(peaks_neg_tot)-1) len(x_start_without_mother),
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) dtype=int) * splitter_y_new[i])
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, int))
x_starting = np.append(x_starting, x_start_without_mother) x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
x_ending = np.append(x_ending, x_end_without_mother) x_ending = np.append(x_ending, x_end_without_mother)
ind_args=np.arange(len(y_type_2)) ind_args=np.arange(len(y_type_2))
@ -1906,42 +1925,39 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order.append(x_end_column_sort[ii]-1) x_end_by_order.append(x_end_column_sort[ii]-1)
else: else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = [] columns_covered_by_mothers = set()
for dj in range(len(x_start_without_mother)): for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers.update(
list(range(int(x_start_without_mother[dj]), range(x_start_without_mother[dj],
int(x_end_without_mother[dj]))) x_end_without_mother[dj]))
columns_covered_by_mothers = list(set(columns_covered_by_mothers)) columns_not_covered = list(all_columns - columns_covered_by_mothers)
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
all_columns=np.arange(len(peaks_neg_tot)-1) dtype=int) * splitter_y_new[i])
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, int))
x_starting = np.append(x_starting, x_start_without_mother) x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
x_ending = np.append(x_ending, x_end_without_mother) x_ending = np.append(x_ending, x_end_without_mother)
columns_covered_by_with_child_no_mothers = [] columns_covered_by_with_child_no_mothers = set()
for dj in range(len(x_end_with_child_without_mother)): for dj in range(len(x_end_with_child_without_mother)):
columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ columns_covered_by_with_child_no_mothers.update(
list(range(int(x_start_with_child_without_mother[dj]), range(x_start_with_child_without_mother[dj],
int(x_end_with_child_without_mother[dj]))) x_end_with_child_without_mother[dj]))
columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) columns_not_covered_child_no_mother = list(
all_columns - columns_covered_by_with_child_no_mothers)
all_columns = np.arange(len(peaks_neg_tot)-1)
columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers))
#indexes_to_be_spanned=[] #indexes_to_be_spanned=[]
for i_s in range(len(x_end_with_child_without_mother)): for i_s in range(len(x_end_with_child_without_mother)):
columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother) columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
ind_args = np.arange(len(y_type_2)) ind_args = np.arange(len(y_type_2))
x_end_with_child_without_mother = np.array(x_end_with_child_without_mother) x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int)
x_start_with_child_without_mother = np.array(x_start_with_child_without_mother) x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int)
for i_s_nc in columns_not_covered_child_no_mother: for i_s_nc in columns_not_covered_child_no_mother:
if i_s_nc in x_start_with_child_without_mother: if i_s_nc in x_start_with_child_without_mother:
x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] x_end_biggest_column = \
x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & args_all_biggest_lines = ind_args[(x_starting==i_s_nc) &
(x_ending==x_end_biggest_column)] (x_ending==x_end_biggest_column)]
y_column_nc = y_type_2[args_all_biggest_lines] y_column_nc = y_type_2[args_all_biggest_lines]
@ -1951,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new(
for i_c in range(len(y_column_nc)): for i_c in range(len(y_column_nc)):
if i_c==(len(y_column_nc)-1): if i_c==(len(y_column_nc)-1):
ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) &
(y_type_2<int(splitter_y_new[i+1])) & (y_type_2<splitter_y_new[i+1]) &
(x_starting>=i_s_nc) & (x_starting>=i_s_nc) &
(x_ending<=x_end_biggest_column)] (x_ending<=x_end_biggest_column)]
else: else:
@ -1967,21 +1983,19 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(x_diff_all_between_nm_wc)>0: if len(x_diff_all_between_nm_wc)>0:
biggest=np.argmax(x_diff_all_between_nm_wc) biggest=np.argmax(x_diff_all_between_nm_wc)
columns_covered_by_mothers = [] columns_covered_by_mothers = set()
for dj in range(len(x_starting_all_between_nm_wc)): for dj in range(len(x_starting_all_between_nm_wc)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers.update(
list(range(int(x_starting_all_between_nm_wc[dj]), range(x_starting_all_between_nm_wc[dj],
int(x_ending_all_between_nm_wc[dj]))) x_ending_all_between_nm_wc[dj]))
columns_covered_by_mothers = list(set(columns_covered_by_mothers)) child_columns = set(range(i_s_nc, x_end_biggest_column))
columns_not_covered = list(child_columns - columns_covered_by_mothers)
all_columns=np.arange(i_s_nc, x_end_biggest_column)
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
should_longest_line_be_extended=0 should_longest_line_be_extended=0
if (len(x_diff_all_between_nm_wc) > 0 and if (len(x_diff_all_between_nm_wc) > 0 and
set(list(range(int(x_starting_all_between_nm_wc[biggest]), set(list(range(x_starting_all_between_nm_wc[biggest],
int(x_ending_all_between_nm_wc[biggest]))) + x_ending_all_between_nm_wc[biggest])) +
list(columns_not_covered)) != set(all_columns)): list(columns_not_covered)) != child_columns):
should_longest_line_be_extended=1 should_longest_line_be_extended=1
index_lines_so_close_to_top_separator = \ index_lines_so_close_to_top_separator = \
np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
@ -1991,9 +2005,12 @@ def return_boxes_of_images_by_order_of_reading_new(
np.array(list(set(list(range(len(y_all_between_nm_wc)))) - np.array(list(set(list(range(len(y_all_between_nm_wc)))) -
set(list(index_lines_so_close_to_top_separator)))) set(list(index_lines_so_close_to_top_separator))))
if len(indexes_remained_after_deleting_closed_lines) > 0: if len(indexes_remained_after_deleting_closed_lines) > 0:
y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] y_all_between_nm_wc = \
x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] x_starting_all_between_nm_wc = \
x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
x_ending_all_between_nm_wc = \
x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc)
@ -2005,11 +2022,11 @@ def return_boxes_of_images_by_order_of_reading_new(
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest]) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest]) x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
except: except:
pass logger.exception("cannot append")
y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered)) y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1) x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
ind_args_between=np.arange(len(x_ending_all_between_nm_wc)) ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
for column in range(int(i_s_nc), int(x_end_biggest_column)): for column in range(int(i_s_nc), int(x_end_biggest_column)):
@ -2078,52 +2095,50 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(y_in_cols)>0: if len(y_in_cols)>0:
y_down=np.min(y_in_cols) y_down=np.min(y_in_cols)
else: else:
y_down=[int(splitter_y_new[i+1])][0] y_down=splitter_y_new[i+1]
#print(y_itself,'y_itself') #print(y_itself,'y_itself')
boxes.append([peaks_neg_tot[column], boxes.append([peaks_neg_tot[column],
peaks_neg_tot[column+1], peaks_neg_tot[column+1],
y_itself, y_itself,
y_down]) y_down])
except: except:
logger.exception("cannot assign boxes")
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
int(splitter_y_new[i]), int(splitter_y_new[i+1])]) splitter_y_new[i], splitter_y_new[i+1]])
else: else:
y_lines_by_order=[] y_lines_by_order=[]
x_start_by_order=[] x_start_by_order=[]
x_end_by_order=[] x_end_by_order=[]
if len(x_starting)>0: if len(x_starting)>0:
all_columns = np.arange(len(peaks_neg_tot)-1) columns_covered_by_lines_covered_more_than_2col = set()
columns_covered_by_lines_covered_more_than_2col = []
for dj in range(len(x_starting)): for dj in range(len(x_starting)):
if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns): if set(range(x_starting[dj], x_ending[dj])) != all_columns:
pass columns_covered_by_lines_covered_more_than_2col.update(
else: range(x_starting[dj], x_ending[dj]))
columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col)
list(range(int(x_starting[dj]),int(x_ending[dj]) ))
columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1,
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) dtype=int) * splitter_y_new[i])
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
if len(new_main_sep_y) > 0: if len(new_main_sep_y) > 0:
x_starting = np.append(x_starting, 0) x_starting = np.append(x_starting, 0)
x_ending = np.append(x_ending, len(peaks_neg_tot)-1) x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
else: else:
x_starting = np.append(x_starting, x_starting[0]) x_starting = np.append(x_starting, x_starting[0])
x_ending = np.append(x_ending, x_ending[0]) x_ending = np.append(x_ending, x_ending[0])
else: else:
all_columns = np.arange(len(peaks_neg_tot)-1) columns_not_covered = list(all_columns)
columns_not_covered = list(set(all_columns)) y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered),
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) dtype=int) * splitter_y_new[i])
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
ind_args=np.array(range(len(y_type_2))) ind_args = np.arange(len(y_type_2))
for column in range(len(peaks_neg_tot)-1): for column in range(len(peaks_neg_tot)-1):
#print(column,'column') #print(column,'column')
@ -2155,7 +2170,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_itself=x_start_copy.pop(il) x_start_itself=x_start_copy.pop(il)
x_end_itself=x_end_copy.pop(il) x_end_itself=x_end_copy.pop(il)
for column in range(int(x_start_itself), int(x_end_itself)+1): for column in range(x_start_itself, x_end_itself+1):
#print(column,'cols') #print(column,'cols')
y_in_cols=[] y_in_cols=[]
for yic in range(len(y_copy)): for yic in range(len(y_copy)):
@ -2169,7 +2184,7 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(y_in_cols)>0: if len(y_in_cols)>0:
y_down=np.min(y_in_cols) y_down=np.min(y_in_cols)
else: else:
y_down=[int(splitter_y_new[i+1])][0] y_down=splitter_y_new[i+1]
#print(y_itself,'y_itself') #print(y_itself,'y_itself')
boxes.append([peaks_neg_tot[column], boxes.append([peaks_neg_tot[column],
peaks_neg_tot[column+1], peaks_neg_tot[column+1],
@ -2191,9 +2206,10 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_new = regions_without_separators.shape[1] - boxes[i][0] x_end_new = regions_without_separators.shape[1] - boxes[i][0]
boxes[i][0] = x_start_new boxes[i][0] = x_start_new
boxes[i][1] = x_end_new boxes[i][1] = x_end_new
return boxes, peaks_neg_tot_tables_new peaks_neg_tot_tables = peaks_neg_tot_tables_new
else:
return boxes, peaks_neg_tot_tables logger.debug('exit return_boxes_of_images_by_order_of_reading_new')
return boxes, peaks_neg_tot_tables
def is_image_filename(fname: str) -> bool: def is_image_filename(fname: str) -> bool:
return fname.lower().endswith(('.jpg', return fname.lower().endswith(('.jpg',

View file

@ -1,7 +1,15 @@
from typing import Sequence, Union
from numbers import Number
from functools import partial from functools import partial
import itertools
import cv2 import cv2
import numpy as np import numpy as np
from shapely import geometry from scipy.sparse.csgraph import minimum_spanning_tree
from shapely.geometry import Polygon, LineString
from shapely.geometry.polygon import orient
from shapely import set_precision
from shapely.ops import unary_union, nearest_points
from .rotate import rotate_image, rotation_image_new from .rotate import rotate_image, rotation_image_new
@ -37,29 +45,28 @@ def get_text_region_boxes_by_given_contours(contours):
return boxes, contours_new return boxes, contours_new
def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
found_polygons_early = [] found_polygons_early = []
for jv,c in enumerate(contours): for jv, contour in enumerate(contours):
if len(c) < 3: # A polygon cannot have less than 3 points if len(contour) < 3: # A polygon cannot have less than 3 points
continue continue
polygon = geometry.Polygon([point[0] for point in c]) polygon = contour2polygon(contour, dilate=dilate)
area = polygon.area area = polygon.area
if (area >= min_area * np.prod(image.shape[:2]) and if (area >= min_area * np.prod(image.shape[:2]) and
area <= max_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and
hierarchy[0][jv][3] == -1): hierarchy[0][jv][3] == -1):
found_polygons_early.append(np.array([[point] found_polygons_early.append(polygon2contour(polygon))
for point in polygon.exterior.coords], dtype=np.uint))
return found_polygons_early return found_polygons_early
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0):
found_polygons_early = [] found_polygons_early = []
for jv,c in enumerate(contours): for jv, contour in enumerate(contours):
if len(c) < 3: # A polygon cannot have less than 3 points if len(contour) < 3: # A polygon cannot have less than 3 points
continue continue
polygon = geometry.Polygon([point[0] for point in c]) polygon = contour2polygon(contour, dilate=dilate)
# area = cv2.contourArea(c) # area = cv2.contourArea(contour)
area = polygon.area area = polygon.area
##print(np.prod(thresh.shape[:2])) ##print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area # Check that polygon has area greater than minimal area
@ -68,9 +75,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
area <= max_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and
# hierarchy[0][jv][3]==-1 # hierarchy[0][jv][3]==-1
True): True):
# print(c[0][0][1]) # print(contour[0][0][1])
found_polygons_early.append(np.array([[point] found_polygons_early.append(polygon2contour(polygon))
for point in polygon.exterior.coords], dtype=np.int32))
return found_polygons_early return found_polygons_early
def find_new_features_of_contours(contours_main): def find_new_features_of_contours(contours_main):
@ -135,12 +141,12 @@ def return_parent_contours(contours, hierarchy):
if hierarchy[0][i][3] == -1] if hierarchy[0][i][3] == -1]
return contours_parent return contours_parent
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 cnts_images = (region_pre_p[:, :, 0] == label) * 1
else: else:
cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -247,30 +253,26 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
return cont_int[0], index_r_con, confidence_contour return cont_int[0], index_r_con, confidence_contour
def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map): def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
if not len(cnts): if not len(cnts):
return [], [] return [], []
confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
#cnts = cnts/2
cnts = [(i/6).astype(int) for i in cnts]
results = map(partial(do_back_rotation_and_get_cnt_back,
img=img,
slope_first=slope_first,
confidence_matrix=confidence_matrix,
),
cnts, range(len(cnts)))
contours, indexes, conf_contours = tuple(zip(*results))
return [i*6 for i in contours], list(conf_contours)
def return_contours_of_interested_textline(region_pre_p, pixel): confidence_matrix = cv2.resize(confidence_matrix,
(img.shape[1] // 6, img.shape[0] // 6),
interpolation=cv2.INTER_NEAREST)
confs = []
for cnt in cnts:
cnt_mask = np.zeros(confidence_matrix.shape)
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
return cnts, confs
def return_contours_of_interested_textline(region_pre_p, label):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 cnts_images = (region_pre_p[:, :, 0] == label) * 1
else: else:
cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -293,12 +295,12 @@ def return_contours_of_image(image):
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return contours, hierarchy return contours, hierarchy
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 cnts_images = (region_pre_p[:, :, 0] == label) * 1
else: else:
cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -311,12 +313,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si
return contours_imgs return contours_imgs
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 cnts_images = (region_pre_p[:, :, 0] == label) * 1
else: else:
cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = (region_pre_p[:, :] == label) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -332,3 +334,97 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
return img_ret[:, :, 0] return img_ret[:, :, 0]
def dilate_textline_contours(all_found_textline_polygons):
return [[polygon2contour(contour2polygon(contour, dilate=6))
for contour in region]
for region in all_found_textline_polygons]
def dilate_textregion_contours(all_found_textline_polygons):
return [polygon2contour(contour2polygon(contour, dilate=6))
for contour in all_found_textline_polygons]
def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
polygon = Polygon([point[0] for point in contour])
if dilate:
polygon = polygon.buffer(dilate)
if polygon.geom_type == 'GeometryCollection':
# heterogeneous result: filter zero-area shapes (LineString, Point)
polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
if polygon.geom_type == 'MultiPolygon':
# homogeneous result: construct convex hull to connect
polygon = join_polygons(polygon.geoms)
return make_valid(polygon)
def polygon2contour(polygon: Polygon) -> np.ndarray:
polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
def make_valid(polygon: Polygon) -> Polygon:
"""Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
def isint(x):
return isinstance(x, int) or int(x) == x
# make sure rounding does not invalidate
if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
polygon = Polygon(np.round(polygon.exterior.coords))
points = list(polygon.exterior.coords[:-1])
# try by re-arranging points
for split in range(1, len(points)):
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
break
# simplification may not be possible (at all) due to ordering
# in that case, try another starting point
polygon = Polygon(points[-split:]+points[:-split])
# try by simplification
for tolerance in range(int(polygon.area + 1.5)):
if polygon.is_valid:
break
# simplification may require a larger tolerance
polygon = polygon.simplify(tolerance + 1)
# try by enlarging
for tolerance in range(1, int(polygon.area + 2.5)):
if polygon.is_valid:
break
# enlargement may require a larger tolerance
polygon = polygon.buffer(tolerance)
assert polygon.is_valid, polygon.wkt
return polygon
def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
# ensure input polygons are simply typed and all oriented equally
polygons = [orient(poly)
for poly in itertools.chain.from_iterable(
[poly.geoms
if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
else [poly]
for poly in polygons])]
npoly = len(polygons)
if npoly == 1:
return polygons[0]
# find min-dist path through all polygons (travelling salesman)
pairs = itertools.combinations(range(npoly), 2)
dists = np.zeros((npoly, npoly), dtype=float)
for i, j in pairs:
dist = polygons[i].distance(polygons[j])
if dist < 1e-5:
dist = 1e-5 # if pair merely touches, we still need to get an edge
dists[i, j] = dist
dists[j, i] = dist
dists = minimum_spanning_tree(dists, overwrite=True)
# add bridge polygons (where necessary)
for prevp, nextp in zip(*dists.nonzero()):
prevp = polygons[prevp]
nextp = polygons[nextp]
nearest = nearest_points(prevp, nextp)
bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
polygons.append(bridgep)
jointp = unary_union(polygons)
assert jointp.geom_type == 'Polygon', jointp.wkt
# follow-up calculations will necessarily be integer;
# so anticipate rounding here and then ensure validity
jointp2 = set_precision(jointp, 1.0)
if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
jointp2 = Polygon(np.round(jointp.exterior.coords))
jointp2 = make_valid(jointp2)
assert jointp2.geom_type == 'Polygon', jointp2.wkt
return jointp2

View file

@ -99,6 +99,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
except: except:
point_left=first_nonzero point_left=first_nonzero
if point_left == first_nonzero and point_right == last_nonzero:
return text_regions
if point_right>=mask_marginals.shape[1]: if point_right>=mask_marginals.shape[1]:

View file

@ -17,9 +17,12 @@ from .contour import (
return_contours_of_interested_textline, return_contours_of_interested_textline,
find_contours_mean_y_diff, find_contours_mean_y_diff,
) )
from .shm import share_ndarray, wrap_ndarray_shared
from . import ( from . import (
find_num_col_deskew, find_num_col_deskew,
crop_image_inside_box, crop_image_inside_box,
box2rect,
box2slice,
) )
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -64,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -75,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
clusters_to_be_deleted = [] clusters_to_be_deleted = []
if len(arg_diff_cluster) > 0: if len(arg_diff_cluster) > 0:
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) clusters_to_be_deleted.append(
arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
for i in range(len(arg_diff_cluster) - 1): for i in range(len(arg_diff_cluster) - 1):
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : clusters_to_be_deleted.append(
arg_diff_cluster[i + 1] + 1]) arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) arg_diff_cluster[i + 1] + 1])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
if len(clusters_to_be_deleted) > 0: if len(clusters_to_be_deleted) > 0:
peaks_new_extra = [] peaks_new_extra = []
for m in range(len(clusters_to_be_deleted)): for m in range(len(clusters_to_be_deleted)):
@ -176,7 +183,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -236,7 +244,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
try: try:
neg_peaks_max=np.max(y_padded_smoothed[peaks]) neg_peaks_max=np.max(y_padded_smoothed[peaks])
arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42]
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -313,23 +322,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) point_down =y_max_cont-1
##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
#point_up
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else: else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) point_down =y_max_cont-1
##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
#point_up
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int( point_down_narrow = peaks[jj] + first_nonzero + int(
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) 1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./2)
else: else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else: else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int( point_down_narrow = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
@ -338,7 +360,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
point_down_narrow = img_patch.shape[0] - 2 point_down_narrow = img_patch.shape[0] - 2
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))] for mj in range(len(xv))]
distances = np.array(distances) distances = np.array(distances)
@ -465,7 +489,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
distances = [cv2.pointPolygonTest(contour_text_interest_copy, distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))] for mj in range(len(xv))]
distances = np.array(distances) distances = np.array(distances)
@ -540,7 +565,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
distances = [cv2.pointPolygonTest(contour_text_interest_copy, distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))] for mj in range(len(xv))]
distances = np.array(distances) distances = np.array(distances)
@ -610,7 +636,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg])
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -686,30 +713,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) ##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1
##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down)
#point_up
# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else: else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) ##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1
##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down)
#point_up
# np.max(y_cont)
#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)
###-int(dis_to_next_down*1./2)
else: else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
else: else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)
###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)
###-int(dis_to_next_down*1./2)
if point_down_narrow >= img_patch.shape[0]: if point_down_narrow >= img_patch.shape[0]:
point_down_narrow = img_patch.shape[0] - 2 point_down_narrow = img_patch.shape[0] - 2
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))]
distances = np.array(distances) distances = np.array(distances)
xvinside = xv[distances >= 0] xvinside = xv[distances >= 0]
@ -798,7 +845,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next)
distances = [cv2.pointPolygonTest(contour_text_interest_copy, distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))] for mj in range(len(xv))]
distances = np.array(distances) distances = np.array(distances)
@ -863,7 +911,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down)
distances = [cv2.pointPolygonTest(contour_text_interest_copy, distances = [cv2.pointPolygonTest(contour_text_interest_copy,
tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])),
True)
for mj in range(len(xv))] for mj in range(len(xv))]
distances = np.array(distances) distances = np.array(distances)
@ -947,7 +996,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -960,8 +1010,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
if len(arg_diff_cluster) > 0: if len(arg_diff_cluster) > 0:
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
for i in range(len(arg_diff_cluster) - 1): for i in range(len(arg_diff_cluster) - 1):
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
arg_diff_cluster[i + 1] + 1])
clusters_to_be_deleted.append(
arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
if len(clusters_to_be_deleted) > 0: if len(clusters_to_be_deleted) > 0:
peaks_new_extra = [] peaks_new_extra = []
for m in range(len(clusters_to_be_deleted)): for m in range(len(clusters_to_be_deleted)):
@ -1011,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
try: try:
neg_peaks_max = np.max(y_padded_smoothed[peaks]) neg_peaks_max = np.max(y_padded_smoothed[peaks])
arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
@ -1287,7 +1341,9 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
return None, cont_final return None, cont_final
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): def textline_contours_postprocessing(textline_mask, slope,
contour_text_interest, box_ind,
add_boxes_coor_into_textlines=False):
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
textline_mask = textline_mask.astype(np.uint8) textline_mask = textline_mask.astype(np.uint8)
kernel = np.ones((5, 5), np.uint8) kernel = np.ones((5, 5), np.uint8)
@ -1347,24 +1403,26 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
return contours_rotated_clean return contours_rotated_clean
def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
if logger is None: if logger is None:
logger = getLogger(__package__) logger = getLogger(__package__)
if not np.prod(img_crop.shape):
return img_crop
if num_col == 1: if num_col == 1:
num_patches = int(img_path.shape[1] / 200.0) num_patches = int(img_crop.shape[1] / 200.0)
else: else:
num_patches = int(img_path.shape[1] / 140.0) num_patches = int(img_crop.shape[1] / 140.0)
# num_patches=int(img_path.shape[1]/200.) # num_patches=int(img_crop.shape[1]/200.)
if num_patches == 0: if num_patches == 0:
num_patches = 1 num_patches = 1
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
# plt.imshow(img_patch_ineterst) # plt.imshow(img_patch_interest)
# plt.show() # plt.show()
length_x = int(img_path.shape[1] / float(num_patches)) length_x = int(img_crop.shape[1] / float(num_patches))
# margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
margin = int(0.04 * length_x) margin = int(0.04 * length_x)
# if margin<=4: # if margin<=4:
@ -1372,7 +1430,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
# margin=0 # margin=0
width_mid = length_x - 2 * margin width_mid = length_x - 2 * margin
nxf = img_path.shape[1] / float(width_mid) nxf = img_crop.shape[1] / float(width_mid)
if nxf > int(nxf): if nxf > int(nxf):
nxf = int(nxf) + 1 nxf = int(nxf) + 1
@ -1388,12 +1446,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
index_x_d = i * width_mid index_x_d = i * width_mid
index_x_u = index_x_d + length_x index_x_u = index_x_d + length_x
if index_x_u > img_path.shape[1]: if index_x_u > img_crop.shape[1]:
index_x_u = img_path.shape[1] index_x_u = img_crop.shape[1]
index_x_d = img_path.shape[1] - length_x index_x_d = img_crop.shape[1] - length_x
# img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
img_xline = img_patch_ineterst[:, index_x_d:index_x_u] img_xline = img_patch_interest[:, index_x_d:index_x_u]
try: try:
assert img_xline.any() assert img_xline.any()
@ -1409,9 +1467,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated = rotate_image(img_xline, slope_xline)
img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) img_patch_interest_revised = np.zeros(img_patch_interest.shape)
for i in range(nxf): for i in range(nxf):
if i == 0: if i == 0:
@ -1421,11 +1479,11 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
index_x_d = i * width_mid index_x_d = i * width_mid
index_x_u = index_x_d + length_x index_x_u = index_x_d + length_x
if index_x_u > img_path.shape[1]: if index_x_u > img_crop.shape[1]:
index_x_u = img_path.shape[1] index_x_u = img_crop.shape[1]
index_x_d = img_path.shape[1] - length_x index_x_d = img_crop.shape[1] - length_x
img_xline = img_patch_ineterst[:, index_x_d:index_x_u] img_xline = img_patch_interest[:, index_x_d:index_x_u]
img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0]
@ -1448,11 +1506,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
return img_patch_ineterst_revised return img_patch_interest_revised
def do_image_rotation(angle, img, sigma_des, logger=None): @wrap_ndarray_shared(kw='img')
def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
if logger is None: if logger is None:
logger = getLogger(__package__) logger = getLogger(__package__)
img_rot = rotate_image(img, angle) img_rot = rotate_image(img, angle)
@ -1465,7 +1524,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None):
return var return var
def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
main_page=False, logger=None, plotter=None, map=map): main_page=False, logger=None, plotter=None, map=None):
if main_page and plotter: if main_page and plotter:
plotter.save_plot_of_textline_density(img_patch_org) plotter.save_plot_of_textline_density(img_patch_org)
@ -1479,159 +1538,75 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
#img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0],
# int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
angles = np.array([-45, 0, 45, 90,]) angles = np.array([-45, 0, 45, 90,])
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
elif main_page: elif main_page:
angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) #angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4),
np.linspace(-6, 6, n_tot_angles // 2),
np.linspace(7, 12, n_tot_angles // 4)))
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
early_slope_edge=11 early_slope_edge=11
if abs(angle) > early_slope_edge: if abs(angle) > early_slope_edge:
if angle < 0: if angle < 0:
angles = np.linspace(-90, -12, n_tot_angles) angles2 = np.linspace(-90, -12, n_tot_angles)
else: else:
angles = np.linspace(90, 12, n_tot_angles) angles2 = np.linspace(90, 12, n_tot_angles)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
if var2 > var:
angle = angle2
else: else:
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
early_slope_edge=22 early_slope_edge=22
if abs(angle) > early_slope_edge: if abs(angle) > early_slope_edge:
if angle < 0: if angle < 0:
angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
else: else:
angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter)
if var2 > var:
angle = angle2
return angle return angle
def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
if logger is None: if logger is None:
logger = getLogger(__package__) logger = getLogger(__package__)
results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles)) if map is None:
results = [do_image_rotation.__wrapped__(angle, img=img, sigma_des=sigma_des, logger=logger)
for angle in angles]
else:
with share_ndarray(img) as img_shared:
results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None),
angles))
if plotter: if plotter:
plotter.save_plot_of_rotation_angle(angles, results) plotter.save_plot_of_rotation_angle(angles, results)
try: try:
var_res = np.array(results) var_res = np.array(results)
assert var_res.any() assert var_res.any()
angle = angles[np.argmax(var_res)] idx = np.argmax(var_res)
angle = angles[idx]
var = var_res[idx]
except: except:
logger.exception("cannot determine best angle among %s", str(angles)) logger.exception("cannot determine best angle among %s", str(angles))
angle = 0 angle = 0
return angle var = 0
return angle, var
def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100,
main_page=False, logger=None, plotter=None):
if main_page and plotter:
plotter.save_plot_of_textline_density(img_patch_org)
img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
max_shape=np.max(img_int.shape)
img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) ))
onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.)
onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
angles = np.array([-45, 0, 45, 90,])
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
elif main_page:
angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
early_slope_edge=11
if abs(angle) > early_slope_edge:
if angle < 0:
angles = np.linspace(-90, -12, n_tot_angles)
else:
angles = np.linspace(90, 12, n_tot_angles)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
else:
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
early_slope_edge=22
if abs(angle) > early_slope_edge:
if angle < 0:
angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
else:
angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
return angle
def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des):
vars_per_each_subprocess = []
angles_per_each_subprocess = []
for mv in range(len(angles_per_process)):
img_rot=rotate_image(img_resized,angles_per_process[mv])
img_rot[img_rot!=0]=1
try:
var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 )
except:
var_spectrum=0
vars_per_each_subprocess.append(var_spectrum)
angles_per_each_subprocess.append(angles_per_process[mv])
queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess])
def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
num_cores = cpu_count()
queue_of_all_params = Queue()
processes = []
nh = np.linspace(0, len(angles), num_cores + 1)
for i in range(num_cores):
angles_per_process = angles[int(nh[i]) : int(nh[i + 1])]
processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des)))
for i in range(num_cores):
processes[i].start()
var_res=[]
all_angles = []
for i in range(num_cores):
list_all_par = queue_of_all_params.get(True)
vars_for_subprocess = list_all_par[0]
angles_sub_process = list_all_par[1]
for j in range(len(vars_for_subprocess)):
var_res.append(vars_for_subprocess[j])
all_angles.append(angles_sub_process[j])
for i in range(num_cores):
processes[i].join()
if plotter:
plotter.save_plot_of_rotation_angle(all_angles, var_res)
try:
var_res=np.array(var_res)
ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
except:
ang_int=0
return ang_int
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new( def do_work_of_slopes_new(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_mask_tot_ea=None, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
): ):
if KERNEL is None: if KERNEL is None:
@ -1641,7 +1616,7 @@ def do_work_of_slopes_new(
logger.debug('enter do_work_of_slopes_new') logger.debug('enter do_work_of_slopes_new')
x, y, w, h = box_text x, y, w, h = box_text
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated) crop_coor = box2rect(box_text)
mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = np.zeros(textline_mask_tot_ea.shape)
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
all_text_region_raw = textline_mask_tot_ea * mask_textline all_text_region_raw = textline_mask_tot_ea * mask_textline
@ -1649,7 +1624,7 @@ def do_work_of_slopes_new(
img_int_p = all_text_region_raw[:,:] img_int_p = all_text_region_raw[:,:]
img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
slope = 0 slope = 0
slope_for_all = slope_deskew slope_for_all = slope_deskew
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
@ -1687,9 +1662,12 @@ def do_work_of_slopes_new(
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
@wrap_ndarray_shared(kw='mask_texts_only')
def do_work_of_slopes_new_curved( def do_work_of_slopes_new_curved(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, textline_mask_tot_ea=None, mask_texts_only=None,
num_col=1, scale_par=1.0, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
): ):
if KERNEL is None: if KERNEL is None:
@ -1706,7 +1684,7 @@ def do_work_of_slopes_new_curved(
# plt.imshow(img_int_p) # plt.imshow(img_int_p)
# plt.show() # plt.show()
if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
slope = 0 slope = 0
slope_for_all = slope_deskew slope_for_all = slope_deskew
else: else:
@ -1732,7 +1710,7 @@ def do_work_of_slopes_new_curved(
slope_for_all = slope_deskew slope_for_all = slope_deskew
slope = slope_for_all slope = slope_for_all
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated) crop_coor = box2rect(box_text)
if abs(slope_for_all) < 45: if abs(slope_for_all) < 45:
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
@ -1765,20 +1743,25 @@ def do_work_of_slopes_new_curved(
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
pixel_img = 1 pixel_img = 1
mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) mask_biggest2 = resize_image(mask_biggest2,
int(mask_biggest2.shape[0] * scale_par),
int(mask_biggest2.shape[1] * scale_par))
cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
try: try:
textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
except Exception as why: except Exception as why:
logger.error(why) logger.error(why)
else: else:
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw,
slope_for_all, contour_par,
box_text, True)
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new_light( def do_work_of_slopes_new_light(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
logger=None logger=None
): ):
if logger is None: if logger is None:
@ -1786,7 +1769,7 @@ def do_work_of_slopes_new_light(
logger.debug('enter do_work_of_slopes_new_light') logger.debug('enter do_work_of_slopes_new_light')
x, y, w, h = box_text x, y, w, h = box_text
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated) crop_coor = box2rect(box_text)
mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = np.zeros(textline_mask_tot_ea.shape)
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
all_text_region_raw = textline_mask_tot_ea * mask_textline all_text_region_raw = textline_mask_tot_ea * mask_textline

45
src/eynollah/utils/shm.py Normal file
View file

@ -0,0 +1,45 @@
from multiprocessing import shared_memory
from contextlib import contextmanager
from functools import wraps
import numpy as np
@contextmanager
def share_ndarray(array: np.ndarray):
size = np.dtype(array.dtype).itemsize * np.prod(array.shape)
shm = shared_memory.SharedMemory(create=True, size=size)
try:
shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf)
shared_array[:] = array[:]
shared_array.flags["WRITEABLE"] = False
yield dict(shape=array.shape, dtype=array.dtype, name=shm.name)
finally:
shm.close()
shm.unlink()
@contextmanager
def ndarray_shared(array: dict):
shm = shared_memory.SharedMemory(name=array['name'])
try:
array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf)
yield array
finally:
shm.close()
def wrap_ndarray_shared(kw=None):
def wrapper(f):
if kw is None:
@wraps(f)
def shared_func(array, *args, **kwargs):
with ndarray_shared(array) as ndarray:
return f(ndarray, *args, **kwargs)
return shared_func
else:
@wraps(f)
def shared_func(*args, **kwargs):
array = kwargs.pop(kw)
with ndarray_shared(array) as ndarray:
kwargs[kw] = ndarray
return f(*args, **kwargs)
return shared_func
return wrapper

View file

@ -92,6 +92,7 @@ def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(t
return peaks_final return peaks_final
else: else:
return None return None
# Function to fit text inside the given area # Function to fit text inside the given area
def fit_text_single_line(draw, text, font_path, max_width, max_height): def fit_text_single_line(draw, text, font_path, max_width, max_height):
initial_font_size = 50 initial_font_size = 50
@ -369,7 +370,11 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind
return textline_contour return textline_contour
def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False): def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons,
prediction_model,
b_s_ocr, num_to_char,
textline_light=False,
curved_line=False):
max_len = 512 max_len = 512
padding_token = 299 padding_token = 299
image_width = 512#max_len * 4 image_width = 512#max_len * 4
@ -425,17 +430,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
if splited_images: if splited_images:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0],
image_height,
image_width)
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(1) cropped_lines_meging_indexing.append(1)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1],
image_height,
image_width)
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(-1) cropped_lines_meging_indexing.append(-1)
else: else:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop,
image_height,
image_width)
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(0) cropped_lines_meging_indexing.append(0)
@ -468,7 +479,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
pred_texts_ib = pred_texts[ib].replace("[UNK]", "") pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
extracted_texts.append(pred_texts_ib) extracted_texts.append(pred_texts_ib)
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [extracted_texts[ind]
if cropped_lines_meging_indexing[ind]==0
else extracted_texts[ind]+" "+extracted_texts[ind+1]
if cropped_lines_meging_indexing[ind]==1
else None
for ind in range(len(cropped_lines_meging_indexing))]
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)

View file

@ -289,7 +289,7 @@ class EynollahXmlWriter():
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
for mm in range(len(found_polygons_text_region_h)): for mm in range(len(found_polygons_text_region_h)):
textregion = TextRegionType(id=counter.next_region_id, type_='header', textregion = TextRegionType(id=counter.next_region_id, type_='heading',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
page.add_TextRegion(textregion) page.add_TextRegion(textregion)
@ -335,7 +335,7 @@ class EynollahXmlWriter():
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
for mm in range(len(polygons_lines_to_be_written_in_xml)): for mm in range(len(polygons_lines_to_be_written_in_xml)):
page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
for mm in range(len(found_polygons_tables)): for mm in range(len(found_polygons_tables)):
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))

View file

@ -20,23 +20,9 @@ MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') "options",
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' [
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
for options in [
[], # defaults [], # defaults
["--allow_scaling", "--curved-line"], ["--allow_scaling", "--curved-line"],
["--allow_scaling", "--curved-line", "--full-layout"], ["--allow_scaling", "--curved-line", "--full-layout"],
@ -47,22 +33,34 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
# -eoi ... # -eoi ...
# --do_ocr # --do_ocr
# --skip_layout_and_reading_order # --skip_layout_and_reading_order
]: ], ids=str)
with subtests.test(#msg="test CLI", def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
options=options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
with caplog.filtering(only_eynollah): outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
result = runner.invoke(layout_cli, args + options, catch_exceptions=False) args = [
assert result.exit_code == 0, result.stdout '-m', MODELS_LAYOUT,
logmsgs = [logrec.message for logrec in caplog.records] '-i', str(infile),
assert str(infile) in logmsgs '-o', str(outfile.parent),
assert outfile.exists() ]
tree = page_from_file(str(outfile)).etree if pytestconfig.getoption('verbose') > 0:
regions = tree.xpath("//page:TextRegion", namespaces=NS) args.extend(['-l', 'DEBUG'])
assert len(regions) >= 2, "result is inaccurate" caplog.set_level(logging.INFO)
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) def only_eynollah(logrec):
assert len(regions) >= 2, "result is inaccurate" return logrec.name == 'eynollah'
lines = tree.xpath("//page:TextLine", namespaces=NS) runner = CliRunner()
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert str(infile) in logmsgs
assert outfile.exists()
tree = page_from_file(str(outfile)).etree
regions = tree.xpath("//page:TextRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
assert len(regions) >= 2, "result is inaccurate"
lines = tree.xpath("//page:TextLine", namespaces=NS)
assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
@ -86,7 +84,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in')) assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
"options",
[
[], # defaults
["--no-patches"],
], ids=str)
def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [ args = [
@ -100,25 +104,19 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer' return logrec.name == 'SbbBinarizer'
runner = CliRunner() runner = CliRunner()
for options in [ with caplog.filtering(only_eynollah):
[], # defaults result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
["--no-patches"], assert result.exit_code == 0, result.stdout
]: logmsgs = [logrec.message for logrec in caplog.records]
with subtests.test(#msg="test CLI", assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
options=options): assert outfile.exists()
with caplog.filtering(only_eynollah): with Image.open(infile) as original_img:
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) original_size = original_img.size
assert result.exit_code == 0, result.stdout with Image.open(outfile) as binarized_img:
logmsgs = [logrec.message for logrec in caplog.records] binarized_size = binarized_img.size
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) assert original_size == binarized_size
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as binarized_img:
binarized_size = binarized_img.size
assert original_size == binarized_size
def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [
@ -139,15 +137,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
"options",
[
[], # defaults
["-sos"],
], ids=str)
def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [ args = [
'-m', MODELS_LAYOUT, '-m', MODELS_LAYOUT,
'-i', str(infile), '-i', str(infile),
'-o', str(outfile.parent), '-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
] ]
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
@ -155,25 +157,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'enhancement' return logrec.name == 'enhancement'
runner = CliRunner() runner = CliRunner()
for options in [ with caplog.filtering(only_eynollah):
[], # defaults result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
["-sos"], assert result.exit_code == 0, result.stdout
]: logmsgs = [logrec.message for logrec in caplog.records]
with subtests.test(#msg="test CLI", assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
options=options): assert outfile.exists()
with caplog.filtering(only_eynollah): with Image.open(infile) as original_img:
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) original_size = original_img.size
assert result.exit_code == 0, result.stdout with Image.open(outfile) as enhanced_img:
logmsgs = [logrec.message for logrec in caplog.records] enhanced_size = enhanced_img.size
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs assert (original_size == enhanced_size) == ("-sos" in options)
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as enhanced_img:
enhanced_size = enhanced_img.size
assert (original_size == enhanced_size) == ("-sos" in options)
def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [
@ -194,7 +190,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
args = [ args = [
@ -223,7 +219,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
#assert in_order != out_order #assert in_order != out_order
assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3'] assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [
@ -245,7 +241,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2 #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
"options",
[
[], # defaults
["-doit", #str(outrenderfile.parent)],
],
["-trocr"],
], ids=str)
def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png') outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
@ -255,8 +259,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
'-i', str(infile), '-i', str(infile),
'-dx', str(infile.parent), '-dx', str(infile.parent),
'-o', str(outfile.parent), '-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
] ]
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
@ -264,33 +266,25 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'eynollah' return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
for options in [ if "-doit" in options:
# kba Fri Sep 26 12:53:49 CEST 2025 options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged with caplog.filtering(only_eynollah):
# [], # defaults result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
# ["-doit", str(outrenderfile.parent)], assert result.exit_code == 0, result.stdout
["-trocr"], logmsgs = [logrec.message for logrec in caplog.records]
]: # FIXME: ocr has no logging!
with subtests.test(#msg="test CLI", #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
options=options): assert outfile.exists()
with caplog.filtering(only_eynollah): if "-doit" in options:
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) assert outrenderfile.exists()
assert result.exit_code == 0, result.stdout #in_tree = page_from_file(str(infile)).etree
logmsgs = [logrec.message for logrec in caplog.records] #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
# FIXME: ocr has no logging! out_tree = page_from_file(str(outfile)).etree
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
assert outfile.exists() assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
if "-doit" in options: assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
assert outrenderfile.exists()
#in_tree = page_from_file(str(infile)).etree
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
out_tree = page_from_file(str(outfile)).etree
out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged") def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [