Merge pull request #1 from bertsky/v3-api-refactor-init

refactoring of Eynollah init and model loading
2025-08-05 08:10:00 +02:00 · 2025-04-04 13:30:23 +02:00 · 2025-04-04 13:30:23 +02:00 · 1a0b9d1958
commit 1a0b9d1958
parent ab3da17547 559d001eef
10 changed files with 690 additions and 826 deletions
--- a/5
+++ b/5
@ -77,9 +77,14 @@ deps-test: models_eynollah

 smoke-test: TMPDIR != mktemp -d
 smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
+	# layout analysis:
 	eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
 	fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
 	fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
+	# directory mode (skip one, add one):
+	eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
+	test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
+	# binarize:
 	eynollah binarization -m $(CURDIR)/default-2021-03-09 $< $(TMPDIR)/$(<F)
 	test -s $(TMPDIR)/$(<F)
 	@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
--- a/README.md
+++ b/README.md
@ -83,23 +83,28 @@ If no option is set, the tool performs layout detection of main regions (backgro
 The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.

 #### Use as OCR-D processor
-🚧 **Work in progress** 

-Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. 
+Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli).

 In this case, the source image file group with (preferably) RGB images should be used as input like this:

-```
-ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models
-```
-    
-Any image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g.
+    ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models 2022-04-05

-```
-ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
-```
-    
-uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps
+
+If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
+- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
+- existing annotation (and respective `AlternativeImage`s) are partially _ignored_:
+  - previous page frame detection (`cropped` images)
+  - previous derotation (`deskewed` images)
+  - previous thresholding (`binarized` images)
+- if the page-level image nevertheless deviates from the original (`@imageFilename`)
+  (because some other preprocessing step was in effect like `denoised`), then
+  the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
+
+
+    ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models 2022-04-05
+
+Still, in general, it makes more sense to add other workflow steps **after** Eynollah.

 #### Additional documentation
 Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@ -256,26 +256,37 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    if log_level:
        getLogger('eynollah').setLevel(getLevelName(log_level))
    if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
-        print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep")
-        sys.exit(1)
+        raise ValueError("Plotting with -sl, -sd, -sa, -sp, -si or -ae also requires -ep")
    elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
-        print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae")
-        sys.exit(1)
+        raise ValueError("Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae")
    if textline_light and not light_version:
-        print('Error: You used -tll to enable light textline detection but -light is not enabled')
-        sys.exit(1)
+        raise ValueError("Light textline detection with -tll also requires -light")
    if light_version and not textline_light:
-        print('Error: You used -light without -tll. Light version need light textline to be enabled.')
-    if extract_only_images and  (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) :
-        print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho')
-        sys.exit(1)
+        raise ValueError("Light version with -light also requires light textline detection -tll")
+    if extract_only_images and allow_enhancement:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside allow_enhancement -ae")
+    if extract_only_images and allow_scaling:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside allow_scaling -as")
+    if extract_only_images and light_version:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside light_version -light")
+    if extract_only_images and curved_line:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside curved_line -cl")
+    if extract_only_images and textline_light:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside textline_light -tll")
+    if extract_only_images and full_layout:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside full_layout -fl")
+    if extract_only_images and tables:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside tables -tab")
+    if extract_only_images and right2left:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside right2left -r2l")
+    if extract_only_images and headers_off:
+        raise ValueError("Image extraction with -eoi can not be enabled alongside headers_off -ho")
+    if image is None and dir_in is None:
+        raise ValueError("Either a single image -i or a dir_in -di is required")
    eynollah = Eynollah(
        model,
-        logger=getLogger('Eynollah'),
-        image_filename=image,
-        overwrite=overwrite,
+        logger=getLogger('eynollah'),
        dir_out=out,
-        dir_in=dir_in,
        dir_of_cropped_images=save_images,
        extract_only_images=extract_only_images,
        dir_of_layout=save_layout,
@ -301,10 +312,9 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
        skip_layout_and_reading_order=skip_layout_and_reading_order,
    )
    if dir_in:
-        eynollah.run()
+        eynollah.run(dir_in=dir_in, overwrite=overwrite)
    else:
-        pcgts = eynollah.run()
-        eynollah.writer.write_pagexml(pcgts)
+        eynollah.run(image_filename=image, overwrite=overwrite)


@main.command()
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
--- a/src/eynollah/processor.py
+++ b/src/eynollah/processor.py
@ -2,7 +2,7 @@ from typing import Optional
 from ocrd_models import OcrdPage
 from ocrd import Processor, OcrdPageResult

-from .eynollah import Eynollah
+from .eynollah import Eynollah, EynollahXmlWriter

 class EynollahProcessor(Processor):
    # already employs background CPU multiprocessing per page
@ -14,11 +14,28 @@ class EynollahProcessor(Processor):
        return 'ocrd-eynollah-segment'

    def setup(self) -> None:
-        # for caching models
-        self.models = None
        if self.parameter['textline_light'] and not self.parameter['light_version']:
            raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
                             "but parameter 'light_version' is not enabled")
+        self.eynollah = Eynollah(
+            self.resolve_resource(self.parameter['models']),
+            logger=self.logger,
+            allow_enhancement=self.parameter['allow_enhancement'],
+            curved_line=self.parameter['curved_line'],
+            right2left=self.parameter['right_to_left'],
+            ignore_page_extraction=self.parameter['ignore_page_extraction'],
+            light_version=self.parameter['light_version'],
+            textline_light=self.parameter['textline_light'],
+            full_layout=self.parameter['full_layout'],
+            allow_scaling=self.parameter['allow_scaling'],
+            headers_off=self.parameter['headers_off'],
+            tables=self.parameter['tables'],
+        )
+        self.eynollah.plotter = None
+
+    def shutdown(self):
+        if hasattr(self, 'eynollah'):
+            del self.eynollah

    def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
        """
@ -60,27 +77,15 @@ class EynollahProcessor(Processor):
            image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
            result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
        # FIXME: mask out already existing regions (incremental segmentation)
-        eynollah = Eynollah(
-            self.resolve_resource(self.parameter['models']),
-            logger=self.logger,
-            allow_enhancement=self.parameter['allow_enhancement'],
-            curved_line=self.parameter['curved_line'],
-            right2left=self.parameter['right_to_left'],
-            ignore_page_extraction=self.parameter['ignore_page_extraction'],
-            light_version=self.parameter['light_version'],
-            textline_light=self.parameter['textline_light'],
-            full_layout=self.parameter['full_layout'],
-            allow_scaling=self.parameter['allow_scaling'],
-            headers_off=self.parameter['headers_off'],
-            tables=self.parameter['tables'],
-            override_dpi=self.parameter['dpi'],
-            pcgts=pcgts,
-            image_filename=image_filename,
-            image_pil=page_image
+        self.eynollah.cache_images(
+            image_pil=page_image,
+            dpi=self.parameter['dpi'],
        )
-        if self.models is not None:
-            # reuse loaded models from previous page
-            eynollah.models = self.models
-        eynollah.run()
-        self.models = eynollah.models
+        self.eynollah.writer = EynollahXmlWriter(
+            dir_out=None,
+            image_filename=image_filename,
+            curved_line=self.eynollah.curved_line,
+            textline_light=self.eynollah.textline_light,
+            pcgts=pcgts)
+        self.eynollah.run_single()
        return result
--- a/src/eynollah/sbb_binarize.py
+++ b/src/eynollah/sbb_binarize.py
@ -4,25 +4,19 @@ Tool to load model and binarize a given image.

 import sys
 from glob import glob
-from os import environ, devnull
-from os.path import join
-from warnings import catch_warnings, simplefilter
 import os
+import logging

 import numpy as np
 from PIL import Image
 import cv2
-environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-stderr = sys.stderr
-sys.stderr = open(devnull, 'w')
+from ocrd_utils import tf_disable_interactive_logs
+tf_disable_interactive_logs()
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from tensorflow.python.keras import backend as tensorflow_backend
-sys.stderr = stderr


-import logging
-
 def resize_image(img_in, input_height, input_width):
    return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)

@ -53,7 +47,7 @@ class SbbBinarizer:
        del self.session

    def load_model(self, model_name):
-        model = load_model(join(self.model_dir, model_name), compile=False)
+        model = load_model(os.path.join(self.model_dir, model_name), compile=False)
        model_height = model.layers[len(model.layers)-1].output_shape[1]
        model_width = model.layers[len(model.layers)-1].output_shape[2]
        n_classes = model.layers[len(model.layers)-1].output_shape[3]
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@ -247,7 +247,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
    img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
    ##cnts = list( (np.array(cnts)/2).astype(np.int16) )
    #cnts = cnts/2
-    cnts = [(i/6).astype(np.int) for i in cnts]
+    cnts = [(i/6).astype(int) for i in cnts]
    results = map(partial(do_back_rotation_and_get_cnt_back,
                          img=img,
                          slope_first=slope_first,
--- a/src/eynollah/utils/pil_cv2.py
+++ b/src/eynollah/utils/pil_cv2.py
@ -1,3 +1,4 @@
+from contextlib import nullcontext
 from PIL import Image
 import numpy as np
 from ocrd_models import OcrdExif
@ -17,12 +18,13 @@ def pil2cv(img):
 def check_dpi(img):
    try:
        if isinstance(img, Image.Image):
-            pil_image = img
+            pil_image = nullcontext(img)
        elif isinstance(img, str):
            pil_image = Image.open(img)
        else:
-            pil_image = cv2pil(img)
-        exif = OcrdExif(pil_image)
+            pil_image = nullcontext(cv2pil(img))
+        with pil_image:
+            exif = OcrdExif(pil_image)
        resolution = exif.resolution
        if resolution == 1:
            raise Exception()
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@ -1616,7 +1616,7 @@ def do_work_of_slopes_new(
            textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
                                                             hierarchy,
                                                             max_area=1, min_area=0.00008)
-            y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
+            y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
            if np.isnan(y_diff_mean):
                slope_for_all = MAX_SLOPE
            else:
@ -1681,7 +1681,7 @@ def do_work_of_slopes_new_curved(
            textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
                                                             hierarchy,
                                                             max_area=1, min_area=0.0008)
-            y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
+            y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
            if np.isnan(y_diff_mean):
                slope_for_all = MAX_SLOPE
            else:
--- a/tests/resources/euler_rechenkunst01_1738_0025.tif
+++ b/tests/resources/euler_rechenkunst01_1738_0025.tif