From 05deb03ec8b8acde37d6f66aac84b25ecea16e3b Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 11 Aug 2020 15:06:34 +0200 Subject: [PATCH] use make_file_id and assert_file_grp_cardinality --- qurator/sbb_textline_detector/ocrd_cli.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index ea1b4e3..26faf45 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -9,7 +9,13 @@ from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor from ocrd_modelfactory import page_from_file from ocrd_models import OcrdFile from ocrd_models.ocrd_page_generateds import MetadataItemType, LabelsType, LabelType -from ocrd_utils import concat_padded, getLogger, MIMETYPE_PAGE +from ocrd_utils import ( + assert_file_grp_cardinality, + getLogger, + make_file_id, + MIMETYPE_PAGE +) + from pkg_resources import resource_string from qurator.sbb_textline_detector import textline_detector @@ -27,7 +33,6 @@ def ocrd_sbb_textline_detector(*args, **kwargs): TOOL = 'ocrd-sbb-textline-detector' - class OcrdSbbTextlineDetectorRecognize(Processor): def __init__(self, *args, **kwargs): @@ -35,12 +40,6 @@ class OcrdSbbTextlineDetectorRecognize(Processor): kwargs['version'] = OCRD_TOOL['version'] super(OcrdSbbTextlineDetectorRecognize, self).__init__(*args, **kwargs) - def _make_file_id(self, input_file, input_file_grp, n): - file_id = input_file.ID.replace(input_file_grp, self.output_file_grp) - if file_id == input_file.ID: - file_id = concat_padded(self.output_file_grp, n) - return file_id - def _resolve_image_file(self, input_file: OcrdFile) -> str: if input_file.mimetype == MIMETYPE_PAGE: pcgts = page_from_file(self.workspace.download_file(input_file)) @@ -51,11 +50,14 @@ class OcrdSbbTextlineDetectorRecognize(Processor): return image_file def process(self): + assert_file_grp_cardinality(self.input_file_grp, 1) + assert_file_grp_cardinality(self.output_file_grp, 1) + for (n, input_file) in enumerate(self.input_files): page_id = input_file.pageId or input_file.ID log.info("INPUT FILE %i / %s", n, input_file) - file_id = self._make_file_id(input_file, self.input_file_grp, n) + file_id = make_file_id(input_file, self.output_file_grp) # Process the files try: