wp

2026-06-22 21:09:14 +02:00 · 2026-04-28 12:18:29 +02:00 · 2026-04-28 12:18:29 +02:00 · a9e12a63da
commit a9e12a63da
parent 957dc66e7c
3 changed files with 129 additions and 0 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -42,6 +42,7 @@ classifiers = [
 eynollah = "eynollah.cli:main"
 eynollah-training = "eynollah.training.cli:main"
 ocrd-eynollah-segment = "eynollah.ocrd_cli_segment:main"
 ocrd-eynollah-recognize = "eynollah.ocrd_cli_recognize:main"
 ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:main"
 [project.urls]
--- a/src/eynollah/ocrd-tool.json
+++ b/src/eynollah/ocrd-tool.json
@ -163,5 +163,42 @@
        }
      ]
    }
  },
  "ocrd-eynollah-recognize": {
    "executable": "ocrd-eynollah-recognize",
    "categories": ["Text recognition and optimization"],
    "steps": ["recognition/text-recognition"],
    "input_file_grp_cardinality": 1,
    "output_file_grp_cardinality": 1,
    "parameters": {
      "models": {
        "type": "string",
        "format": "uri",
        "content-type": "text/directory",
        "cacheable": true,
        "description": "Directory containing the eynollah_models directory",
        "required": true
      },
      "do_not_mask_with_textline_contour": {
        "type": "boolean",
        "description": "if this parameter set to true, cropped textline images will not be masked with textline contour.",
        "default": false
      },
      "tr_ocr": {
        "type": "boolean",
        "description": "Whether to use (much more resource-intensive) transformer model",
        "default": false
      }
    },
    "resources": [
      {
        "url": "https://zenodo.org/records/17580627/files/models_ocr_v0_6_0.tar.gz?download=1",
        "name": "models_ocr_v0_6_0",
        "type": "archive",
        "size": 6119874002,
        "description": "Models for OCR",
        "version_range": ">= v0.6.0"
      }
    ]
  }
 }
--- a/src/eynollah/ocrd_cli_recognize.py
+++ b/src/eynollah/ocrd_cli_recognize.py
@ -0,0 +1,91 @@
 from functools import cached_property
 from pathlib import Path
 from typing import Optional
 from ocrd.workspace import page_from_file
 from ocrd_models import OcrdFileType, OcrdPage
 from ocrd import Processor
 from ocrd_utils import (
    make_file_id,
 )
 from eynollah.eynollah_ocr import Eynollah_ocr
 from eynollah.model_zoo.model_zoo import EynollahModelZoo
 from eynollah.utils.pil_cv2 import pil2cv
 from eynollah.utils.xml import etree_namespace_for_element_tag
 class EynollahRecognizeProcessor(Processor):
    @cached_property
    def executable(self):
        return 'ocrd-eynollah-recognize'
    def setup(self):
        """
        Load model, set predict function
        """
        assert self.parameter
        model_zoo = EynollahModelZoo(basedir=self.parameter['models'])
        assert self.parameter
        self.eynollah_ocr = Eynollah_ocr(
            model_zoo=model_zoo,
            tr_ocr=self.parameter['tr_ocr'],
            do_not_mask_with_textline_contour=self.parameter['do_not_mask_with_textline_contour'],
            batch_size=self.parameter['batch_size'],
            min_conf_value_of_textline_text=self.parameter['min_conf_value_of_textline_text'])
    # FIXME: This is just a proof-of-concept, very inefficient and non-conformant
    # TODO: OCR writing should use PAGE API once result dataclass mechanism is settled,
    #       then simplify/port to proces_page_pcgts
    def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
        assert self.workspace
        page_file = input_files[0]
        assert page_file
        page = page_from_file(page_file)
        assert page
        page_image, page_coords, _ = self.workspace.image_from_page(
            page, page_file.pageId,
            feature_selector="")
        page_ns = etree_namespace_for_element_tag(page.etree.getroot().tag)
        img = pil2cv(page_image)
        if self.eynollah_ocr.tr_ocr:
            result = self.eynollah_ocr.run_trocr(
                img=img,
                page_tree=page.etree,
                page_ns=page_ns,
                tr_ocr_input_height_and_width = 384
            )
        else:
            page_image_bin, _, _ = self.workspace.image_from_page(
                page, page_file.pageId,
                feature_selector="binarized")
            result = self.eynollah_ocr.run_cnn( 
                img=img,
                page_tree=page.etree,
                page_ns=page_ns,
                img_bin=pil2cv(page_image_bin),
                image_width=512,
                image_height=32,
            )
        output_file_id = make_file_id(page_file, self.output_file_grp)
        output_filename = Path(self.output_file_grp, output_file_id + '.xml')
        output_filename.parent.mkdir()
        self.eynollah_ocr.write_ocr(
            result=result,
            img=img,
            page_tree=page.etree,
            page_ns=page_ns,
            out_file_ocr=str(output_filename),
            out_image_with_text=None,
        )
        self.workspace.add_file(
            file_id=output_file_id,
            file_grp=self.output_file_grp,
            page_id=page_file.pageId,
            local_filename=output_filename,
            mimetype=page_ns,
        )