You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
eynollah/qurator/eynollah/processor.py

36 lines
1.4 KiB
Python

from typing import Optional
from ocrd.processor.ocrd_page_result import OcrdPageResult
from ocrd_models import OcrdPage
from ocrd import Processor
from .eynollah import Eynollah
class EynollahProcessor(Processor):
@property
def metadata_location(self) -> str:
return 'eynollah/ocrd-tool.json'
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
assert input_pcgts
assert input_pcgts[0]
pcgts = input_pcgts[0]
page = pcgts.get_Page()
# XXX loses DPI information
# page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename
Eynollah(
dir_models=self.resolve_resource(self.parameter['models']),
allow_enhancement=False,
curved_line=self.parameter['curved_line'],
full_layout=self.parameter['full_layout'],
allow_scaling=self.parameter['allow_scaling'],
headers_off=self.parameter['headers_off'],
tables=self.parameter['tables'],
override_dpi=self.parameter['dpi'],
logger=self.logger,
pcgts=pcgts,
image_filename=image_filename
).run()
return OcrdPageResult(pcgts)