mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-15 15:19:55 +02:00
Merge pull request #1 from bertsky/v3-api-refactor-init
refactoring of Eynollah init and model loading
This commit is contained in:
commit
1a0b9d1958
10 changed files with 690 additions and 826 deletions
5
Makefile
5
Makefile
|
@ -77,9 +77,14 @@ deps-test: models_eynollah
|
|||
|
||||
smoke-test: TMPDIR != mktemp -d
|
||||
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||
# layout analysis:
|
||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
||||
# directory mode (skip one, add one):
|
||||
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
||||
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
||||
# binarize:
|
||||
eynollah binarization -m $(CURDIR)/default-2021-03-09 $< $(TMPDIR)/$(<F)
|
||||
test -s $(TMPDIR)/$(<F)
|
||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||
|
|
29
README.md
29
README.md
|
@ -83,23 +83,28 @@ If no option is set, the tool performs layout detection of main regions (backgro
|
|||
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
|
||||
|
||||
#### Use as OCR-D processor
|
||||
🚧 **Work in progress**
|
||||
|
||||
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor.
|
||||
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli).
|
||||
|
||||
In this case, the source image file group with (preferably) RGB images should be used as input like this:
|
||||
|
||||
```
|
||||
ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models
|
||||
```
|
||||
|
||||
Any image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g.
|
||||
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models 2022-04-05
|
||||
|
||||
```
|
||||
ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
|
||||
```
|
||||
|
||||
uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps
|
||||
|
||||
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
|
||||
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
|
||||
- existing annotation (and respective `AlternativeImage`s) are partially _ignored_:
|
||||
- previous page frame detection (`cropped` images)
|
||||
- previous derotation (`deskewed` images)
|
||||
- previous thresholding (`binarized` images)
|
||||
- if the page-level image nevertheless deviates from the original (`@imageFilename`)
|
||||
(because some other preprocessing step was in effect like `denoised`), then
|
||||
the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
|
||||
|
||||
|
||||
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models 2022-04-05
|
||||
|
||||
Still, in general, it makes more sense to add other workflow steps **after** Eynollah.
|
||||
|
||||
#### Additional documentation
|
||||
Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).
|
||||
|
|
|
@ -256,26 +256,37 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
if log_level:
|
||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||
if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
|
||||
print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep")
|
||||
sys.exit(1)
|
||||
raise ValueError("Plotting with -sl, -sd, -sa, -sp, -si or -ae also requires -ep")
|
||||
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
|
||||
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae")
|
||||
sys.exit(1)
|
||||
raise ValueError("Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae")
|
||||
if textline_light and not light_version:
|
||||
print('Error: You used -tll to enable light textline detection but -light is not enabled')
|
||||
sys.exit(1)
|
||||
raise ValueError("Light textline detection with -tll also requires -light")
|
||||
if light_version and not textline_light:
|
||||
print('Error: You used -light without -tll. Light version need light textline to be enabled.')
|
||||
if extract_only_images and (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) :
|
||||
print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho')
|
||||
sys.exit(1)
|
||||
raise ValueError("Light version with -light also requires light textline detection -tll")
|
||||
if extract_only_images and allow_enhancement:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside allow_enhancement -ae")
|
||||
if extract_only_images and allow_scaling:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside allow_scaling -as")
|
||||
if extract_only_images and light_version:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside light_version -light")
|
||||
if extract_only_images and curved_line:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside curved_line -cl")
|
||||
if extract_only_images and textline_light:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside textline_light -tll")
|
||||
if extract_only_images and full_layout:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside full_layout -fl")
|
||||
if extract_only_images and tables:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside tables -tab")
|
||||
if extract_only_images and right2left:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside right2left -r2l")
|
||||
if extract_only_images and headers_off:
|
||||
raise ValueError("Image extraction with -eoi can not be enabled alongside headers_off -ho")
|
||||
if image is None and dir_in is None:
|
||||
raise ValueError("Either a single image -i or a dir_in -di is required")
|
||||
eynollah = Eynollah(
|
||||
model,
|
||||
logger=getLogger('Eynollah'),
|
||||
image_filename=image,
|
||||
overwrite=overwrite,
|
||||
logger=getLogger('eynollah'),
|
||||
dir_out=out,
|
||||
dir_in=dir_in,
|
||||
dir_of_cropped_images=save_images,
|
||||
extract_only_images=extract_only_images,
|
||||
dir_of_layout=save_layout,
|
||||
|
@ -301,10 +312,9 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
skip_layout_and_reading_order=skip_layout_and_reading_order,
|
||||
)
|
||||
if dir_in:
|
||||
eynollah.run()
|
||||
eynollah.run(dir_in=dir_in, overwrite=overwrite)
|
||||
else:
|
||||
pcgts = eynollah.run()
|
||||
eynollah.writer.write_pagexml(pcgts)
|
||||
eynollah.run(image_filename=image, overwrite=overwrite)
|
||||
|
||||
|
||||
@main.command()
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,7 @@ from typing import Optional
|
|||
from ocrd_models import OcrdPage
|
||||
from ocrd import Processor, OcrdPageResult
|
||||
|
||||
from .eynollah import Eynollah
|
||||
from .eynollah import Eynollah, EynollahXmlWriter
|
||||
|
||||
class EynollahProcessor(Processor):
|
||||
# already employs background CPU multiprocessing per page
|
||||
|
@ -14,11 +14,28 @@ class EynollahProcessor(Processor):
|
|||
return 'ocrd-eynollah-segment'
|
||||
|
||||
def setup(self) -> None:
|
||||
# for caching models
|
||||
self.models = None
|
||||
if self.parameter['textline_light'] and not self.parameter['light_version']:
|
||||
raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
|
||||
"but parameter 'light_version' is not enabled")
|
||||
self.eynollah = Eynollah(
|
||||
self.resolve_resource(self.parameter['models']),
|
||||
logger=self.logger,
|
||||
allow_enhancement=self.parameter['allow_enhancement'],
|
||||
curved_line=self.parameter['curved_line'],
|
||||
right2left=self.parameter['right_to_left'],
|
||||
ignore_page_extraction=self.parameter['ignore_page_extraction'],
|
||||
light_version=self.parameter['light_version'],
|
||||
textline_light=self.parameter['textline_light'],
|
||||
full_layout=self.parameter['full_layout'],
|
||||
allow_scaling=self.parameter['allow_scaling'],
|
||||
headers_off=self.parameter['headers_off'],
|
||||
tables=self.parameter['tables'],
|
||||
)
|
||||
self.eynollah.plotter = None
|
||||
|
||||
def shutdown(self):
|
||||
if hasattr(self, 'eynollah'):
|
||||
del self.eynollah
|
||||
|
||||
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
|
||||
"""
|
||||
|
@ -60,27 +77,15 @@ class EynollahProcessor(Processor):
|
|||
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
|
||||
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
|
||||
# FIXME: mask out already existing regions (incremental segmentation)
|
||||
eynollah = Eynollah(
|
||||
self.resolve_resource(self.parameter['models']),
|
||||
logger=self.logger,
|
||||
allow_enhancement=self.parameter['allow_enhancement'],
|
||||
curved_line=self.parameter['curved_line'],
|
||||
right2left=self.parameter['right_to_left'],
|
||||
ignore_page_extraction=self.parameter['ignore_page_extraction'],
|
||||
light_version=self.parameter['light_version'],
|
||||
textline_light=self.parameter['textline_light'],
|
||||
full_layout=self.parameter['full_layout'],
|
||||
allow_scaling=self.parameter['allow_scaling'],
|
||||
headers_off=self.parameter['headers_off'],
|
||||
tables=self.parameter['tables'],
|
||||
override_dpi=self.parameter['dpi'],
|
||||
pcgts=pcgts,
|
||||
image_filename=image_filename,
|
||||
image_pil=page_image
|
||||
self.eynollah.cache_images(
|
||||
image_pil=page_image,
|
||||
dpi=self.parameter['dpi'],
|
||||
)
|
||||
if self.models is not None:
|
||||
# reuse loaded models from previous page
|
||||
eynollah.models = self.models
|
||||
eynollah.run()
|
||||
self.models = eynollah.models
|
||||
self.eynollah.writer = EynollahXmlWriter(
|
||||
dir_out=None,
|
||||
image_filename=image_filename,
|
||||
curved_line=self.eynollah.curved_line,
|
||||
textline_light=self.eynollah.textline_light,
|
||||
pcgts=pcgts)
|
||||
self.eynollah.run_single()
|
||||
return result
|
||||
|
|
|
@ -4,25 +4,19 @@ Tool to load model and binarize a given image.
|
|||
|
||||
import sys
|
||||
from glob import glob
|
||||
from os import environ, devnull
|
||||
from os.path import join
|
||||
from warnings import catch_warnings, simplefilter
|
||||
import os
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
stderr = sys.stderr
|
||||
sys.stderr = open(devnull, 'w')
|
||||
from ocrd_utils import tf_disable_interactive_logs
|
||||
tf_disable_interactive_logs()
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import load_model
|
||||
from tensorflow.python.keras import backend as tensorflow_backend
|
||||
sys.stderr = stderr
|
||||
|
||||
|
||||
import logging
|
||||
|
||||
def resize_image(img_in, input_height, input_width):
|
||||
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
|
@ -53,7 +47,7 @@ class SbbBinarizer:
|
|||
del self.session
|
||||
|
||||
def load_model(self, model_name):
|
||||
model = load_model(join(self.model_dir, model_name), compile=False)
|
||||
model = load_model(os.path.join(self.model_dir, model_name), compile=False)
|
||||
model_height = model.layers[len(model.layers)-1].output_shape[1]
|
||||
model_width = model.layers[len(model.layers)-1].output_shape[2]
|
||||
n_classes = model.layers[len(model.layers)-1].output_shape[3]
|
||||
|
|
|
@ -247,7 +247,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
|
|||
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
||||
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||
#cnts = cnts/2
|
||||
cnts = [(i/6).astype(np.int) for i in cnts]
|
||||
cnts = [(i/6).astype(int) for i in cnts]
|
||||
results = map(partial(do_back_rotation_and_get_cnt_back,
|
||||
img=img,
|
||||
slope_first=slope_first,
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
from contextlib import nullcontext
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
from ocrd_models import OcrdExif
|
||||
|
@ -17,12 +18,13 @@ def pil2cv(img):
|
|||
def check_dpi(img):
|
||||
try:
|
||||
if isinstance(img, Image.Image):
|
||||
pil_image = img
|
||||
pil_image = nullcontext(img)
|
||||
elif isinstance(img, str):
|
||||
pil_image = Image.open(img)
|
||||
else:
|
||||
pil_image = cv2pil(img)
|
||||
exif = OcrdExif(pil_image)
|
||||
pil_image = nullcontext(cv2pil(img))
|
||||
with pil_image:
|
||||
exif = OcrdExif(pil_image)
|
||||
resolution = exif.resolution
|
||||
if resolution == 1:
|
||||
raise Exception()
|
||||
|
|
|
@ -1616,7 +1616,7 @@ def do_work_of_slopes_new(
|
|||
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
||||
hierarchy,
|
||||
max_area=1, min_area=0.00008)
|
||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
|
||||
if np.isnan(y_diff_mean):
|
||||
slope_for_all = MAX_SLOPE
|
||||
else:
|
||||
|
@ -1681,7 +1681,7 @@ def do_work_of_slopes_new_curved(
|
|||
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
||||
hierarchy,
|
||||
max_area=1, min_area=0.0008)
|
||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
|
||||
if np.isnan(y_diff_mean):
|
||||
slope_for_all = MAX_SLOPE
|
||||
else:
|
||||
|
|
BIN
tests/resources/euler_rechenkunst01_1738_0025.tif
Normal file
BIN
tests/resources/euler_rechenkunst01_1738_0025.tif
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue