mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-04-30 19:22:03 +02:00
wp
This commit is contained in:
parent
957dc66e7c
commit
a9e12a63da
3 changed files with 129 additions and 0 deletions
|
|
@ -42,6 +42,7 @@ classifiers = [
|
|||
eynollah = "eynollah.cli:main"
|
||||
eynollah-training = "eynollah.training.cli:main"
|
||||
ocrd-eynollah-segment = "eynollah.ocrd_cli_segment:main"
|
||||
ocrd-eynollah-recognize = "eynollah.ocrd_cli_recognize:main"
|
||||
ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:main"
|
||||
|
||||
[project.urls]
|
||||
|
|
|
|||
|
|
@ -163,5 +163,42 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"ocrd-eynollah-recognize": {
|
||||
"executable": "ocrd-eynollah-recognize",
|
||||
"categories": ["Text recognition and optimization"],
|
||||
"steps": ["recognition/text-recognition"],
|
||||
"input_file_grp_cardinality": 1,
|
||||
"output_file_grp_cardinality": 1,
|
||||
"parameters": {
|
||||
"models": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"content-type": "text/directory",
|
||||
"cacheable": true,
|
||||
"description": "Directory containing the eynollah_models directory",
|
||||
"required": true
|
||||
},
|
||||
"do_not_mask_with_textline_contour": {
|
||||
"type": "boolean",
|
||||
"description": "if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
||||
"default": false
|
||||
},
|
||||
"tr_ocr": {
|
||||
"type": "boolean",
|
||||
"description": "Whether to use (much more resource-intensive) transformer model",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
{
|
||||
"url": "https://zenodo.org/records/17580627/files/models_ocr_v0_6_0.tar.gz?download=1",
|
||||
"name": "models_ocr_v0_6_0",
|
||||
"type": "archive",
|
||||
"size": 6119874002,
|
||||
"description": "Models for OCR",
|
||||
"version_range": ">= v0.6.0"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
91
src/eynollah/ocrd_cli_recognize.py
Normal file
91
src/eynollah/ocrd_cli_recognize.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from ocrd.workspace import page_from_file
|
||||
from ocrd_models import OcrdFileType, OcrdPage
|
||||
|
||||
from ocrd import Processor
|
||||
from ocrd_utils import (
|
||||
make_file_id,
|
||||
)
|
||||
|
||||
from eynollah.eynollah_ocr import Eynollah_ocr
|
||||
from eynollah.model_zoo.model_zoo import EynollahModelZoo
|
||||
from eynollah.utils.pil_cv2 import pil2cv
|
||||
from eynollah.utils.xml import etree_namespace_for_element_tag
|
||||
|
||||
|
||||
class EynollahRecognizeProcessor(Processor):
|
||||
|
||||
@cached_property
|
||||
def executable(self):
|
||||
return 'ocrd-eynollah-recognize'
|
||||
|
||||
def setup(self):
|
||||
"""
|
||||
Load model, set predict function
|
||||
"""
|
||||
assert self.parameter
|
||||
model_zoo = EynollahModelZoo(basedir=self.parameter['models'])
|
||||
assert self.parameter
|
||||
self.eynollah_ocr = Eynollah_ocr(
|
||||
model_zoo=model_zoo,
|
||||
tr_ocr=self.parameter['tr_ocr'],
|
||||
do_not_mask_with_textline_contour=self.parameter['do_not_mask_with_textline_contour'],
|
||||
batch_size=self.parameter['batch_size'],
|
||||
min_conf_value_of_textline_text=self.parameter['min_conf_value_of_textline_text'])
|
||||
|
||||
# FIXME: This is just a proof-of-concept, very inefficient and non-conformant
|
||||
# TODO: OCR writing should use PAGE API once result dataclass mechanism is settled,
|
||||
# then simplify/port to proces_page_pcgts
|
||||
def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
|
||||
assert self.workspace
|
||||
page_file = input_files[0]
|
||||
assert page_file
|
||||
page = page_from_file(page_file)
|
||||
assert page
|
||||
page_image, page_coords, _ = self.workspace.image_from_page(
|
||||
page, page_file.pageId,
|
||||
feature_selector="")
|
||||
page_ns = etree_namespace_for_element_tag(page.etree.getroot().tag)
|
||||
|
||||
img = pil2cv(page_image)
|
||||
if self.eynollah_ocr.tr_ocr:
|
||||
result = self.eynollah_ocr.run_trocr(
|
||||
img=img,
|
||||
page_tree=page.etree,
|
||||
page_ns=page_ns,
|
||||
|
||||
tr_ocr_input_height_and_width = 384
|
||||
)
|
||||
else:
|
||||
page_image_bin, _, _ = self.workspace.image_from_page(
|
||||
page, page_file.pageId,
|
||||
feature_selector="binarized")
|
||||
result = self.eynollah_ocr.run_cnn(
|
||||
img=img,
|
||||
page_tree=page.etree,
|
||||
page_ns=page_ns,
|
||||
|
||||
img_bin=pil2cv(page_image_bin),
|
||||
image_width=512,
|
||||
image_height=32,
|
||||
)
|
||||
output_file_id = make_file_id(page_file, self.output_file_grp)
|
||||
output_filename = Path(self.output_file_grp, output_file_id + '.xml')
|
||||
output_filename.parent.mkdir()
|
||||
self.eynollah_ocr.write_ocr(
|
||||
result=result,
|
||||
img=img,
|
||||
page_tree=page.etree,
|
||||
page_ns=page_ns,
|
||||
out_file_ocr=str(output_filename),
|
||||
out_image_with_text=None,
|
||||
)
|
||||
self.workspace.add_file(
|
||||
file_id=output_file_id,
|
||||
file_grp=self.output_file_grp,
|
||||
page_id=page_file.pageId,
|
||||
local_filename=output_filename,
|
||||
mimetype=page_ns,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue