mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-15 07:09:55 +02:00
Merge pull request #1 from bertsky/v3-api-refactor-init
refactoring of Eynollah init and model loading
This commit is contained in:
commit
1a0b9d1958
10 changed files with 690 additions and 826 deletions
5
Makefile
5
Makefile
|
@ -77,9 +77,14 @@ deps-test: models_eynollah
|
||||||
|
|
||||||
smoke-test: TMPDIR != mktemp -d
|
smoke-test: TMPDIR != mktemp -d
|
||||||
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||||
|
# layout analysis:
|
||||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
||||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
||||||
|
# directory mode (skip one, add one):
|
||||||
|
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
||||||
|
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
||||||
|
# binarize:
|
||||||
eynollah binarization -m $(CURDIR)/default-2021-03-09 $< $(TMPDIR)/$(<F)
|
eynollah binarization -m $(CURDIR)/default-2021-03-09 $< $(TMPDIR)/$(<F)
|
||||||
test -s $(TMPDIR)/$(<F)
|
test -s $(TMPDIR)/$(<F)
|
||||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||||
|
|
29
README.md
29
README.md
|
@ -83,23 +83,28 @@ If no option is set, the tool performs layout detection of main regions (backgro
|
||||||
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
|
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
|
||||||
|
|
||||||
#### Use as OCR-D processor
|
#### Use as OCR-D processor
|
||||||
🚧 **Work in progress**
|
|
||||||
|
|
||||||
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor.
|
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli).
|
||||||
|
|
||||||
In this case, the source image file group with (preferably) RGB images should be used as input like this:
|
In this case, the source image file group with (preferably) RGB images should be used as input like this:
|
||||||
|
|
||||||
```
|
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models 2022-04-05
|
||||||
ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models
|
|
||||||
```
|
|
||||||
|
|
||||||
Any image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g.
|
|
||||||
|
|
||||||
```
|
|
||||||
ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
|
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
|
||||||
```
|
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
|
||||||
|
- existing annotation (and respective `AlternativeImage`s) are partially _ignored_:
|
||||||
uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps
|
- previous page frame detection (`cropped` images)
|
||||||
|
- previous derotation (`deskewed` images)
|
||||||
|
- previous thresholding (`binarized` images)
|
||||||
|
- if the page-level image nevertheless deviates from the original (`@imageFilename`)
|
||||||
|
(because some other preprocessing step was in effect like `denoised`), then
|
||||||
|
the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
|
||||||
|
|
||||||
|
|
||||||
|
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models 2022-04-05
|
||||||
|
|
||||||
|
Still, in general, it makes more sense to add other workflow steps **after** Eynollah.
|
||||||
|
|
||||||
#### Additional documentation
|
#### Additional documentation
|
||||||
Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).
|
Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).
|
||||||
|
|
|
@ -256,26 +256,37 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
if log_level:
|
if log_level:
|
||||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||||
if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
|
if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
|
||||||
print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep")
|
raise ValueError("Plotting with -sl, -sd, -sa, -sp, -si or -ae also requires -ep")
|
||||||
sys.exit(1)
|
|
||||||
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
|
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
|
||||||
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae")
|
raise ValueError("Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae")
|
||||||
sys.exit(1)
|
|
||||||
if textline_light and not light_version:
|
if textline_light and not light_version:
|
||||||
print('Error: You used -tll to enable light textline detection but -light is not enabled')
|
raise ValueError("Light textline detection with -tll also requires -light")
|
||||||
sys.exit(1)
|
|
||||||
if light_version and not textline_light:
|
if light_version and not textline_light:
|
||||||
print('Error: You used -light without -tll. Light version need light textline to be enabled.')
|
raise ValueError("Light version with -light also requires light textline detection -tll")
|
||||||
if extract_only_images and (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) :
|
if extract_only_images and allow_enhancement:
|
||||||
print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho')
|
raise ValueError("Image extraction with -eoi can not be enabled alongside allow_enhancement -ae")
|
||||||
sys.exit(1)
|
if extract_only_images and allow_scaling:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside allow_scaling -as")
|
||||||
|
if extract_only_images and light_version:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside light_version -light")
|
||||||
|
if extract_only_images and curved_line:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside curved_line -cl")
|
||||||
|
if extract_only_images and textline_light:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside textline_light -tll")
|
||||||
|
if extract_only_images and full_layout:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside full_layout -fl")
|
||||||
|
if extract_only_images and tables:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside tables -tab")
|
||||||
|
if extract_only_images and right2left:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside right2left -r2l")
|
||||||
|
if extract_only_images and headers_off:
|
||||||
|
raise ValueError("Image extraction with -eoi can not be enabled alongside headers_off -ho")
|
||||||
|
if image is None and dir_in is None:
|
||||||
|
raise ValueError("Either a single image -i or a dir_in -di is required")
|
||||||
eynollah = Eynollah(
|
eynollah = Eynollah(
|
||||||
model,
|
model,
|
||||||
logger=getLogger('Eynollah'),
|
logger=getLogger('eynollah'),
|
||||||
image_filename=image,
|
|
||||||
overwrite=overwrite,
|
|
||||||
dir_out=out,
|
dir_out=out,
|
||||||
dir_in=dir_in,
|
|
||||||
dir_of_cropped_images=save_images,
|
dir_of_cropped_images=save_images,
|
||||||
extract_only_images=extract_only_images,
|
extract_only_images=extract_only_images,
|
||||||
dir_of_layout=save_layout,
|
dir_of_layout=save_layout,
|
||||||
|
@ -301,10 +312,9 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
skip_layout_and_reading_order=skip_layout_and_reading_order,
|
skip_layout_and_reading_order=skip_layout_and_reading_order,
|
||||||
)
|
)
|
||||||
if dir_in:
|
if dir_in:
|
||||||
eynollah.run()
|
eynollah.run(dir_in=dir_in, overwrite=overwrite)
|
||||||
else:
|
else:
|
||||||
pcgts = eynollah.run()
|
eynollah.run(image_filename=image, overwrite=overwrite)
|
||||||
eynollah.writer.write_pagexml(pcgts)
|
|
||||||
|
|
||||||
|
|
||||||
@main.command()
|
@main.command()
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,7 @@ from typing import Optional
|
||||||
from ocrd_models import OcrdPage
|
from ocrd_models import OcrdPage
|
||||||
from ocrd import Processor, OcrdPageResult
|
from ocrd import Processor, OcrdPageResult
|
||||||
|
|
||||||
from .eynollah import Eynollah
|
from .eynollah import Eynollah, EynollahXmlWriter
|
||||||
|
|
||||||
class EynollahProcessor(Processor):
|
class EynollahProcessor(Processor):
|
||||||
# already employs background CPU multiprocessing per page
|
# already employs background CPU multiprocessing per page
|
||||||
|
@ -14,11 +14,28 @@ class EynollahProcessor(Processor):
|
||||||
return 'ocrd-eynollah-segment'
|
return 'ocrd-eynollah-segment'
|
||||||
|
|
||||||
def setup(self) -> None:
|
def setup(self) -> None:
|
||||||
# for caching models
|
|
||||||
self.models = None
|
|
||||||
if self.parameter['textline_light'] and not self.parameter['light_version']:
|
if self.parameter['textline_light'] and not self.parameter['light_version']:
|
||||||
raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
|
raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
|
||||||
"but parameter 'light_version' is not enabled")
|
"but parameter 'light_version' is not enabled")
|
||||||
|
self.eynollah = Eynollah(
|
||||||
|
self.resolve_resource(self.parameter['models']),
|
||||||
|
logger=self.logger,
|
||||||
|
allow_enhancement=self.parameter['allow_enhancement'],
|
||||||
|
curved_line=self.parameter['curved_line'],
|
||||||
|
right2left=self.parameter['right_to_left'],
|
||||||
|
ignore_page_extraction=self.parameter['ignore_page_extraction'],
|
||||||
|
light_version=self.parameter['light_version'],
|
||||||
|
textline_light=self.parameter['textline_light'],
|
||||||
|
full_layout=self.parameter['full_layout'],
|
||||||
|
allow_scaling=self.parameter['allow_scaling'],
|
||||||
|
headers_off=self.parameter['headers_off'],
|
||||||
|
tables=self.parameter['tables'],
|
||||||
|
)
|
||||||
|
self.eynollah.plotter = None
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
if hasattr(self, 'eynollah'):
|
||||||
|
del self.eynollah
|
||||||
|
|
||||||
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
|
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
|
||||||
"""
|
"""
|
||||||
|
@ -60,27 +77,15 @@ class EynollahProcessor(Processor):
|
||||||
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
|
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
|
||||||
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
|
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
|
||||||
# FIXME: mask out already existing regions (incremental segmentation)
|
# FIXME: mask out already existing regions (incremental segmentation)
|
||||||
eynollah = Eynollah(
|
self.eynollah.cache_images(
|
||||||
self.resolve_resource(self.parameter['models']),
|
image_pil=page_image,
|
||||||
logger=self.logger,
|
dpi=self.parameter['dpi'],
|
||||||
allow_enhancement=self.parameter['allow_enhancement'],
|
|
||||||
curved_line=self.parameter['curved_line'],
|
|
||||||
right2left=self.parameter['right_to_left'],
|
|
||||||
ignore_page_extraction=self.parameter['ignore_page_extraction'],
|
|
||||||
light_version=self.parameter['light_version'],
|
|
||||||
textline_light=self.parameter['textline_light'],
|
|
||||||
full_layout=self.parameter['full_layout'],
|
|
||||||
allow_scaling=self.parameter['allow_scaling'],
|
|
||||||
headers_off=self.parameter['headers_off'],
|
|
||||||
tables=self.parameter['tables'],
|
|
||||||
override_dpi=self.parameter['dpi'],
|
|
||||||
pcgts=pcgts,
|
|
||||||
image_filename=image_filename,
|
|
||||||
image_pil=page_image
|
|
||||||
)
|
)
|
||||||
if self.models is not None:
|
self.eynollah.writer = EynollahXmlWriter(
|
||||||
# reuse loaded models from previous page
|
dir_out=None,
|
||||||
eynollah.models = self.models
|
image_filename=image_filename,
|
||||||
eynollah.run()
|
curved_line=self.eynollah.curved_line,
|
||||||
self.models = eynollah.models
|
textline_light=self.eynollah.textline_light,
|
||||||
|
pcgts=pcgts)
|
||||||
|
self.eynollah.run_single()
|
||||||
return result
|
return result
|
||||||
|
|
|
@ -4,25 +4,19 @@ Tool to load model and binarize a given image.
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from os import environ, devnull
|
|
||||||
from os.path import join
|
|
||||||
from warnings import catch_warnings, simplefilter
|
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import cv2
|
import cv2
|
||||||
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
from ocrd_utils import tf_disable_interactive_logs
|
||||||
stderr = sys.stderr
|
tf_disable_interactive_logs()
|
||||||
sys.stderr = open(devnull, 'w')
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.keras.models import load_model
|
from tensorflow.keras.models import load_model
|
||||||
from tensorflow.python.keras import backend as tensorflow_backend
|
from tensorflow.python.keras import backend as tensorflow_backend
|
||||||
sys.stderr = stderr
|
|
||||||
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
def resize_image(img_in, input_height, input_width):
|
def resize_image(img_in, input_height, input_width):
|
||||||
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
||||||
|
|
||||||
|
@ -53,7 +47,7 @@ class SbbBinarizer:
|
||||||
del self.session
|
del self.session
|
||||||
|
|
||||||
def load_model(self, model_name):
|
def load_model(self, model_name):
|
||||||
model = load_model(join(self.model_dir, model_name), compile=False)
|
model = load_model(os.path.join(self.model_dir, model_name), compile=False)
|
||||||
model_height = model.layers[len(model.layers)-1].output_shape[1]
|
model_height = model.layers[len(model.layers)-1].output_shape[1]
|
||||||
model_width = model.layers[len(model.layers)-1].output_shape[2]
|
model_width = model.layers[len(model.layers)-1].output_shape[2]
|
||||||
n_classes = model.layers[len(model.layers)-1].output_shape[3]
|
n_classes = model.layers[len(model.layers)-1].output_shape[3]
|
||||||
|
|
|
@ -247,7 +247,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
|
||||||
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
||||||
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||||
#cnts = cnts/2
|
#cnts = cnts/2
|
||||||
cnts = [(i/6).astype(np.int) for i in cnts]
|
cnts = [(i/6).astype(int) for i in cnts]
|
||||||
results = map(partial(do_back_rotation_and_get_cnt_back,
|
results = map(partial(do_back_rotation_and_get_cnt_back,
|
||||||
img=img,
|
img=img,
|
||||||
slope_first=slope_first,
|
slope_first=slope_first,
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
from contextlib import nullcontext
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ocrd_models import OcrdExif
|
from ocrd_models import OcrdExif
|
||||||
|
@ -17,12 +18,13 @@ def pil2cv(img):
|
||||||
def check_dpi(img):
|
def check_dpi(img):
|
||||||
try:
|
try:
|
||||||
if isinstance(img, Image.Image):
|
if isinstance(img, Image.Image):
|
||||||
pil_image = img
|
pil_image = nullcontext(img)
|
||||||
elif isinstance(img, str):
|
elif isinstance(img, str):
|
||||||
pil_image = Image.open(img)
|
pil_image = Image.open(img)
|
||||||
else:
|
else:
|
||||||
pil_image = cv2pil(img)
|
pil_image = nullcontext(cv2pil(img))
|
||||||
exif = OcrdExif(pil_image)
|
with pil_image:
|
||||||
|
exif = OcrdExif(pil_image)
|
||||||
resolution = exif.resolution
|
resolution = exif.resolution
|
||||||
if resolution == 1:
|
if resolution == 1:
|
||||||
raise Exception()
|
raise Exception()
|
||||||
|
|
|
@ -1616,7 +1616,7 @@ def do_work_of_slopes_new(
|
||||||
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
||||||
hierarchy,
|
hierarchy,
|
||||||
max_area=1, min_area=0.00008)
|
max_area=1, min_area=0.00008)
|
||||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
|
||||||
if np.isnan(y_diff_mean):
|
if np.isnan(y_diff_mean):
|
||||||
slope_for_all = MAX_SLOPE
|
slope_for_all = MAX_SLOPE
|
||||||
else:
|
else:
|
||||||
|
@ -1681,7 +1681,7 @@ def do_work_of_slopes_new_curved(
|
||||||
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
||||||
hierarchy,
|
hierarchy,
|
||||||
max_area=1, min_area=0.0008)
|
max_area=1, min_area=0.0008)
|
||||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
|
||||||
if np.isnan(y_diff_mean):
|
if np.isnan(y_diff_mean):
|
||||||
slope_for_all = MAX_SLOPE
|
slope_for_all = MAX_SLOPE
|
||||||
else:
|
else:
|
||||||
|
|
BIN
tests/resources/euler_rechenkunst01_1738_0025.tif
Normal file
BIN
tests/resources/euler_rechenkunst01_1738_0025.tif
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue