update OCR-D bindings

2026-03-02 05:11:57 +01:00 · 2025-11-26 16:20:27 +01:00 · 2025-11-26 16:20:27 +01:00 · 9d9d32daed
commit 9d9d32daed
parent 103c007368
4 changed files with 24 additions and 9 deletions
--- a/6
+++ b/6
@ -102,12 +102,12 @@ ocrd-test: tests/resources/2files/kant_aufklaerung_1784_0020.tif
 	cp $< $(TMPDIR)
 	ocrd workspace -d $(TMPDIR) init
 	ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
-	ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/models_eynollah
+	ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)
 	result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
 	fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
 	fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
-	ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/models_eynollah/eynollah-binarization_20210425 
-	ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/models_eynollah/eynollah-binarization_20210425  -P operation_level region
+	ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)
+	ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)  -P operation_level region
 	$(RM) -r $(TMPDIR)

 # Run unit tests
--- a/src/eynollah/ocrd-tool.json
+++ b/src/eynollah/ocrd-tool.json
@ -82,13 +82,21 @@
        }
      },
      "resources": [
+        {
+          "url": "https://zenodo.org/records/17580627/files/models_all_v0_7_0.zip?download=1",
+          "name": "models_layout_v0_7_0",
+          "type": "archive",
+          "size": 6119874002,
+          "description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement and OCR",
+          "version_range": ">= v0.7.0"
+        },
        {
          "url": "https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1",
          "name": "models_layout_v0_6_0",
          "type": "archive",
          "path_in_archive": "models_layout_v0_6_0",
          "size": 3525684179,
-          "description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement",
+          "description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement and OCR",
          "version_range": ">= v0.5.0"
        },
        {
--- a/src/eynollah/ocrd_cli_binarization.py
+++ b/src/eynollah/ocrd_cli_binarization.py
@ -1,6 +1,8 @@
+from functools import cached_property
 from typing import Optional

 from PIL import Image
+from frozendict import frozendict
 import numpy as np
 import cv2
 from click import command
@ -9,6 +11,8 @@ from ocrd import Processor, OcrdPageResult, OcrdPageResultImage
 from ocrd_models.ocrd_page import OcrdPage, AlternativeImageType
 from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor

+from eynollah.model_zoo.model_zoo import EynollahModelZoo
+
 from .sbb_binarize import SbbBinarizer


@ -25,7 +29,7 @@ class SbbBinarizeProcessor(Processor):
    # already employs GPU (without singleton process atm)
    max_workers = 1

-    @property
+    @cached_property
    def executable(self):
        return 'ocrd-sbb-binarize'

@ -34,9 +38,9 @@ class SbbBinarizeProcessor(Processor):
        Set up the model prior to processing.
        """
        # resolve relative path via OCR-D ResourceManager
-        assert isinstance(self.parameter, dict)
-        model_path = self.resolve_resource(self.parameter['model'])
-        self.binarizer = SbbBinarizer(model_dir=model_path, logger=self.logger)
+        assert isinstance(self.parameter, frozendict)
+        model_zoo = EynollahModelZoo(basedir=self.parameter['model'])
+        self.binarizer = SbbBinarizer(model_zoo=model_zoo, mode='single', logger=self.logger)

    def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
        """
--- a/src/eynollah/processor.py
+++ b/src/eynollah/processor.py
@ -3,6 +3,8 @@ from typing import Optional
 from ocrd_models import OcrdPage
 from ocrd import OcrdPageResultImage, Processor, OcrdPageResult

+from eynollah.model_zoo.model_zoo import EynollahModelZoo
+
 from .eynollah import Eynollah, EynollahXmlWriter

 class EynollahProcessor(Processor):
@ -19,8 +21,9 @@ class EynollahProcessor(Processor):
        if self.parameter['textline_light'] != self.parameter['light_version']:
            raise ValueError("Error: You must set or unset both parameter 'textline_light' (to enable light textline detection), "
                             "and parameter 'light_version' (faster+simpler method for main region detection and deskewing)")
+        model_zoo = EynollahModelZoo(basedir=self.parameter['models'])
        self.eynollah = Eynollah(
-            self.resolve_resource(self.parameter['models']),
+            model_zoo=model_zoo,
            allow_enhancement=self.parameter['allow_enhancement'],
            curved_line=self.parameter['curved_line'],
            right2left=self.parameter['right_to_left'],