mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
WIP: reorganize OCR-D and start on ocrd-eynollah-ocr
This commit is contained in:
parent
0f410c2e7c
commit
acb91efe48
7 changed files with 44 additions and 31 deletions
|
|
@ -41,8 +41,9 @@ classifiers = [
|
|||
[project.scripts]
|
||||
eynollah = "eynollah.cli:main"
|
||||
eynollah-training = "eynollah.training.cli:main"
|
||||
ocrd-eynollah-segment = "eynollah.ocrd_cli:main"
|
||||
ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:main"
|
||||
ocrd-eynollah-segment = "eynollah.ocrd.ocrd_cli_layout:main"
|
||||
ocrd-sbb-binarize = "eynollah.ocrd.ocrd_cli_binarize:main"
|
||||
ocrd-eynollah-ocr = "eynollah.ocrd.ocrd_cli_ocr:main"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/qurator-spk/eynollah"
|
||||
|
|
|
|||
0
src/eynollah/ocrd/__init__.py
Normal file
0
src/eynollah/ocrd/__init__.py
Normal file
9
src/eynollah/ocrd/ocrd_cli_binarize.py
Normal file
9
src/eynollah/ocrd/ocrd_cli_binarize.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from click import command
|
||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||
|
||||
from .processor_binarize import SbbBinarizeProcessor
|
||||
|
||||
@command()
|
||||
@ocrd_cli_options
|
||||
def main(*args, **kwargs):
|
||||
return ocrd_cli_wrap_processor(SbbBinarizeProcessor, *args, **kwargs)
|
||||
|
|
@ -1,11 +1,9 @@
|
|||
from .processor import EynollahProcessor
|
||||
from click import command
|
||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||
|
||||
from .processor_layout import EynollahProcessor
|
||||
|
||||
@command()
|
||||
@ocrd_cli_options
|
||||
def main(*args, **kwargs):
|
||||
return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -1,29 +1,15 @@
|
|||
from functools import cached_property
|
||||
from typing import Optional
|
||||
|
||||
from PIL import Image
|
||||
from frozendict import frozendict
|
||||
import numpy as np
|
||||
import cv2
|
||||
from click import command
|
||||
|
||||
from ocrd import Processor, OcrdPageResult, OcrdPageResultImage
|
||||
from ocrd_models.ocrd_page import OcrdPage, AlternativeImageType
|
||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||
|
||||
from eynollah.model_zoo.model_zoo import EynollahModelZoo
|
||||
|
||||
from .sbb_binarize import SbbBinarizer
|
||||
|
||||
|
||||
def cv2pil(img):
|
||||
return Image.fromarray(img.astype('uint8'))
|
||||
|
||||
def pil2cv(img):
|
||||
# from ocrd/workspace.py
|
||||
color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR
|
||||
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
||||
return cv2.cvtColor(pil_as_np_array, color_conversion)
|
||||
from ..sbb_binarize import SbbBinarizer
|
||||
from ..utils.pil_cv2 import cv2pil, pil2cv
|
||||
|
||||
class SbbBinarizeProcessor(Processor):
|
||||
# already employs GPU (without singleton process atm)
|
||||
|
|
@ -103,12 +89,7 @@ class SbbBinarizeProcessor(Processor):
|
|||
line_image_bin = cv2pil(self.binarizer.run(image=pil2cv(line_image), use_patches=True))
|
||||
# update PAGE (reference the image file):
|
||||
line_image_ref = AlternativeImageType(comments=line_xywh['features'] + ',binarized')
|
||||
line.add_AlternativeImage(region_image_ref)
|
||||
line.add_AlternativeImage(line_image_ref)
|
||||
result.images.append(OcrdPageResultImage(line_image_bin, line.id + '.IMG-BIN', line_image_ref))
|
||||
|
||||
return result
|
||||
|
||||
@command()
|
||||
@ocrd_cli_options
|
||||
def main(*args, **kwargs):
|
||||
return ocrd_cli_wrap_processor(SbbBinarizeProcessor, *args, **kwargs)
|
||||
|
|
@ -3,9 +3,9 @@ from typing import Optional
|
|||
from ocrd_models import OcrdPage
|
||||
from ocrd import OcrdPageResultImage, Processor, OcrdPageResult
|
||||
|
||||
from eynollah.model_zoo.model_zoo import EynollahModelZoo
|
||||
|
||||
from .eynollah import Eynollah, EynollahXmlWriter
|
||||
from ..model_zoo.model_zoo import EynollahModelZoo
|
||||
from ..eynollah import Eynollah
|
||||
from ..writer import EynollahXmlWriter
|
||||
|
||||
class EynollahProcessor(Processor):
|
||||
# already employs background CPU multiprocessing per page
|
||||
24
src/eynollah/ocrd/processor_ocr.py
Normal file
24
src/eynollah/ocrd/processor_ocr.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
|
||||
from functools import cached_property
|
||||
from frozendict import frozendict
|
||||
from ocrd import Processor
|
||||
|
||||
from ..model_zoo.model_zoo import EynollahModelZoo
|
||||
|
||||
|
||||
class EynollahOcrProcessor(Processor):
|
||||
# already employs GPU (without singleton process atm)
|
||||
max_workers = 1
|
||||
|
||||
@cached_property
|
||||
def executable(self):
|
||||
return 'ocrd-eynollah-ocr'
|
||||
|
||||
def setup(self):
|
||||
"""
|
||||
Set up the model prior to processing.
|
||||
"""
|
||||
# resolve relative path via OCR-D ResourceManager
|
||||
assert isinstance(self.parameter, frozendict)
|
||||
model_zoo = EynollahModelZoo(basedir=self.parameter['model'])
|
||||
raise NotImplementedError()
|
||||
Loading…
Add table
Add a link
Reference in a new issue