Merge pull request #1 from bertsky/v3-api-refactor-init

refactoring of Eynollah init and model loading
pull/148/head
Robert Sachunsky 1 week ago committed by GitHub
commit 1a0b9d1958
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -77,9 +77,14 @@ deps-test: models_eynollah
smoke-test: TMPDIR != mktemp -d
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
# layout analysis:
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
# directory mode (skip one, add one):
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
# binarize:
eynollah binarization -m $(CURDIR)/default-2021-03-09 $< $(TMPDIR)/$(<F)
test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"

@ -83,23 +83,28 @@ If no option is set, the tool performs layout detection of main regions (backgro
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
#### Use as OCR-D processor
🚧 **Work in progress**
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor.
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli).
In this case, the source image file group with (preferably) RGB images should be used as input like this:
```
ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models
```
Any image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g.
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models 2022-04-05
```
ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
```
uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
- existing annotation (and respective `AlternativeImage`s) are partially _ignored_:
- previous page frame detection (`cropped` images)
- previous derotation (`deskewed` images)
- previous thresholding (`binarized` images)
- if the page-level image nevertheless deviates from the original (`@imageFilename`)
(because some other preprocessing step was in effect like `denoised`), then
the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models 2022-04-05
Still, in general, it makes more sense to add other workflow steps **after** Eynollah.
#### Additional documentation
Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).

@ -256,26 +256,37 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
if log_level:
getLogger('eynollah').setLevel(getLevelName(log_level))
if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep")
sys.exit(1)
raise ValueError("Plotting with -sl, -sd, -sa, -sp, -si or -ae also requires -ep")
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae")
sys.exit(1)
raise ValueError("Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae")
if textline_light and not light_version:
print('Error: You used -tll to enable light textline detection but -light is not enabled')
sys.exit(1)
raise ValueError("Light textline detection with -tll also requires -light")
if light_version and not textline_light:
print('Error: You used -light without -tll. Light version need light textline to be enabled.')
if extract_only_images and (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) :
print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho')
sys.exit(1)
raise ValueError("Light version with -light also requires light textline detection -tll")
if extract_only_images and allow_enhancement:
raise ValueError("Image extraction with -eoi can not be enabled alongside allow_enhancement -ae")
if extract_only_images and allow_scaling:
raise ValueError("Image extraction with -eoi can not be enabled alongside allow_scaling -as")
if extract_only_images and light_version:
raise ValueError("Image extraction with -eoi can not be enabled alongside light_version -light")
if extract_only_images and curved_line:
raise ValueError("Image extraction with -eoi can not be enabled alongside curved_line -cl")
if extract_only_images and textline_light:
raise ValueError("Image extraction with -eoi can not be enabled alongside textline_light -tll")
if extract_only_images and full_layout:
raise ValueError("Image extraction with -eoi can not be enabled alongside full_layout -fl")
if extract_only_images and tables:
raise ValueError("Image extraction with -eoi can not be enabled alongside tables -tab")
if extract_only_images and right2left:
raise ValueError("Image extraction with -eoi can not be enabled alongside right2left -r2l")
if extract_only_images and headers_off:
raise ValueError("Image extraction with -eoi can not be enabled alongside headers_off -ho")
if image is None and dir_in is None:
raise ValueError("Either a single image -i or a dir_in -di is required")
eynollah = Eynollah(
model,
logger=getLogger('Eynollah'),
image_filename=image,
overwrite=overwrite,
logger=getLogger('eynollah'),
dir_out=out,
dir_in=dir_in,
dir_of_cropped_images=save_images,
extract_only_images=extract_only_images,
dir_of_layout=save_layout,
@ -301,10 +312,9 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
skip_layout_and_reading_order=skip_layout_and_reading_order,
)
if dir_in:
eynollah.run()
eynollah.run(dir_in=dir_in, overwrite=overwrite)
else:
pcgts = eynollah.run()
eynollah.writer.write_pagexml(pcgts)
eynollah.run(image_filename=image, overwrite=overwrite)
@main.command()

File diff suppressed because it is too large Load Diff

@ -2,7 +2,7 @@ from typing import Optional
from ocrd_models import OcrdPage
from ocrd import Processor, OcrdPageResult
from .eynollah import Eynollah
from .eynollah import Eynollah, EynollahXmlWriter
class EynollahProcessor(Processor):
# already employs background CPU multiprocessing per page
@ -14,11 +14,28 @@ class EynollahProcessor(Processor):
return 'ocrd-eynollah-segment'
def setup(self) -> None:
# for caching models
self.models = None
if self.parameter['textline_light'] and not self.parameter['light_version']:
raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
"but parameter 'light_version' is not enabled")
self.eynollah = Eynollah(
self.resolve_resource(self.parameter['models']),
logger=self.logger,
allow_enhancement=self.parameter['allow_enhancement'],
curved_line=self.parameter['curved_line'],
right2left=self.parameter['right_to_left'],
ignore_page_extraction=self.parameter['ignore_page_extraction'],
light_version=self.parameter['light_version'],
textline_light=self.parameter['textline_light'],
full_layout=self.parameter['full_layout'],
allow_scaling=self.parameter['allow_scaling'],
headers_off=self.parameter['headers_off'],
tables=self.parameter['tables'],
)
self.eynollah.plotter = None
def shutdown(self):
if hasattr(self, 'eynollah'):
del self.eynollah
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
"""
@ -60,27 +77,15 @@ class EynollahProcessor(Processor):
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
# FIXME: mask out already existing regions (incremental segmentation)
eynollah = Eynollah(
self.resolve_resource(self.parameter['models']),
logger=self.logger,
allow_enhancement=self.parameter['allow_enhancement'],
curved_line=self.parameter['curved_line'],
right2left=self.parameter['right_to_left'],
ignore_page_extraction=self.parameter['ignore_page_extraction'],
light_version=self.parameter['light_version'],
textline_light=self.parameter['textline_light'],
full_layout=self.parameter['full_layout'],
allow_scaling=self.parameter['allow_scaling'],
headers_off=self.parameter['headers_off'],
tables=self.parameter['tables'],
override_dpi=self.parameter['dpi'],
pcgts=pcgts,
image_filename=image_filename,
image_pil=page_image
self.eynollah.cache_images(
image_pil=page_image,
dpi=self.parameter['dpi'],
)
if self.models is not None:
# reuse loaded models from previous page
eynollah.models = self.models
eynollah.run()
self.models = eynollah.models
self.eynollah.writer = EynollahXmlWriter(
dir_out=None,
image_filename=image_filename,
curved_line=self.eynollah.curved_line,
textline_light=self.eynollah.textline_light,
pcgts=pcgts)
self.eynollah.run_single()
return result

@ -4,24 +4,18 @@ Tool to load model and binarize a given image.
import sys
from glob import glob
from os import environ, devnull
from os.path import join
from warnings import catch_warnings, simplefilter
import os
import logging
import numpy as np
from PIL import Image
import cv2
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
stderr = sys.stderr
sys.stderr = open(devnull, 'w')
from ocrd_utils import tf_disable_interactive_logs
tf_disable_interactive_logs()
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.python.keras import backend as tensorflow_backend
sys.stderr = stderr
import logging
def resize_image(img_in, input_height, input_width):
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
@ -53,7 +47,7 @@ class SbbBinarizer:
del self.session
def load_model(self, model_name):
model = load_model(join(self.model_dir, model_name), compile=False)
model = load_model(os.path.join(self.model_dir, model_name), compile=False)
model_height = model.layers[len(model.layers)-1].output_shape[1]
model_width = model.layers[len(model.layers)-1].output_shape[2]
n_classes = model.layers[len(model.layers)-1].output_shape[3]

@ -247,7 +247,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
#cnts = cnts/2
cnts = [(i/6).astype(np.int) for i in cnts]
cnts = [(i/6).astype(int) for i in cnts]
results = map(partial(do_back_rotation_and_get_cnt_back,
img=img,
slope_first=slope_first,

@ -1,3 +1,4 @@
from contextlib import nullcontext
from PIL import Image
import numpy as np
from ocrd_models import OcrdExif
@ -17,12 +18,13 @@ def pil2cv(img):
def check_dpi(img):
try:
if isinstance(img, Image.Image):
pil_image = img
pil_image = nullcontext(img)
elif isinstance(img, str):
pil_image = Image.open(img)
else:
pil_image = cv2pil(img)
exif = OcrdExif(pil_image)
pil_image = nullcontext(cv2pil(img))
with pil_image:
exif = OcrdExif(pil_image)
resolution = exif.resolution
if resolution == 1:
raise Exception()

@ -1616,7 +1616,7 @@ def do_work_of_slopes_new(
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
hierarchy,
max_area=1, min_area=0.00008)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
if np.isnan(y_diff_mean):
slope_for_all = MAX_SLOPE
else:
@ -1681,7 +1681,7 @@ def do_work_of_slopes_new_curved(
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
hierarchy,
max_area=1, min_area=0.0008)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
if np.isnan(y_diff_mean):
slope_for_all = MAX_SLOPE
else:

Loading…
Cancel
Save