mirror of
https://github.com/qurator-spk/sbb_textline_detection.git
synced 2025-06-08 19:30:03 +02:00
use make_file_id and assert_file_grp_cardinality
This commit is contained in:
parent
8b01d9e671
commit
05deb03ec8
1 changed files with 11 additions and 9 deletions
|
@ -9,7 +9,13 @@ from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||||
from ocrd_modelfactory import page_from_file
|
from ocrd_modelfactory import page_from_file
|
||||||
from ocrd_models import OcrdFile
|
from ocrd_models import OcrdFile
|
||||||
from ocrd_models.ocrd_page_generateds import MetadataItemType, LabelsType, LabelType
|
from ocrd_models.ocrd_page_generateds import MetadataItemType, LabelsType, LabelType
|
||||||
from ocrd_utils import concat_padded, getLogger, MIMETYPE_PAGE
|
from ocrd_utils import (
|
||||||
|
assert_file_grp_cardinality,
|
||||||
|
getLogger,
|
||||||
|
make_file_id,
|
||||||
|
MIMETYPE_PAGE
|
||||||
|
)
|
||||||
|
|
||||||
from pkg_resources import resource_string
|
from pkg_resources import resource_string
|
||||||
|
|
||||||
from qurator.sbb_textline_detector import textline_detector
|
from qurator.sbb_textline_detector import textline_detector
|
||||||
|
@ -27,7 +33,6 @@ def ocrd_sbb_textline_detector(*args, **kwargs):
|
||||||
|
|
||||||
TOOL = 'ocrd-sbb-textline-detector'
|
TOOL = 'ocrd-sbb-textline-detector'
|
||||||
|
|
||||||
|
|
||||||
class OcrdSbbTextlineDetectorRecognize(Processor):
|
class OcrdSbbTextlineDetectorRecognize(Processor):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -35,12 +40,6 @@ class OcrdSbbTextlineDetectorRecognize(Processor):
|
||||||
kwargs['version'] = OCRD_TOOL['version']
|
kwargs['version'] = OCRD_TOOL['version']
|
||||||
super(OcrdSbbTextlineDetectorRecognize, self).__init__(*args, **kwargs)
|
super(OcrdSbbTextlineDetectorRecognize, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
def _make_file_id(self, input_file, input_file_grp, n):
|
|
||||||
file_id = input_file.ID.replace(input_file_grp, self.output_file_grp)
|
|
||||||
if file_id == input_file.ID:
|
|
||||||
file_id = concat_padded(self.output_file_grp, n)
|
|
||||||
return file_id
|
|
||||||
|
|
||||||
def _resolve_image_file(self, input_file: OcrdFile) -> str:
|
def _resolve_image_file(self, input_file: OcrdFile) -> str:
|
||||||
if input_file.mimetype == MIMETYPE_PAGE:
|
if input_file.mimetype == MIMETYPE_PAGE:
|
||||||
pcgts = page_from_file(self.workspace.download_file(input_file))
|
pcgts = page_from_file(self.workspace.download_file(input_file))
|
||||||
|
@ -51,11 +50,14 @@ class OcrdSbbTextlineDetectorRecognize(Processor):
|
||||||
return image_file
|
return image_file
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
|
assert_file_grp_cardinality(self.input_file_grp, 1)
|
||||||
|
assert_file_grp_cardinality(self.output_file_grp, 1)
|
||||||
|
|
||||||
for (n, input_file) in enumerate(self.input_files):
|
for (n, input_file) in enumerate(self.input_files):
|
||||||
page_id = input_file.pageId or input_file.ID
|
page_id = input_file.pageId or input_file.ID
|
||||||
log.info("INPUT FILE %i / %s", n, input_file)
|
log.info("INPUT FILE %i / %s", n, input_file)
|
||||||
|
|
||||||
file_id = self._make_file_id(input_file, self.input_file_grp, n)
|
file_id = make_file_id(input_file, self.output_file_grp)
|
||||||
|
|
||||||
# Process the files
|
# Process the files
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue