mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-15 15:19:55 +02:00
OCR-D: init Eynollah in 'setup', re-use instance for each page via non-public API
This commit is contained in:
parent
ffeb4a343d
commit
dd51f900b9
1 changed files with 33 additions and 24 deletions
|
@ -2,7 +2,7 @@ from typing import Optional
|
||||||
from ocrd_models import OcrdPage
|
from ocrd_models import OcrdPage
|
||||||
from ocrd import Processor, OcrdPageResult
|
from ocrd import Processor, OcrdPageResult
|
||||||
|
|
||||||
from .eynollah import Eynollah
|
from .eynollah import Eynollah, EynollahXmlWriter
|
||||||
|
|
||||||
class EynollahProcessor(Processor):
|
class EynollahProcessor(Processor):
|
||||||
# already employs background CPU multiprocessing per page
|
# already employs background CPU multiprocessing per page
|
||||||
|
@ -14,11 +14,32 @@ class EynollahProcessor(Processor):
|
||||||
return 'ocrd-eynollah-segment'
|
return 'ocrd-eynollah-segment'
|
||||||
|
|
||||||
def setup(self) -> None:
|
def setup(self) -> None:
|
||||||
# for caching models
|
|
||||||
self.models = None
|
|
||||||
if self.parameter['textline_light'] and not self.parameter['light_version']:
|
if self.parameter['textline_light'] and not self.parameter['light_version']:
|
||||||
raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
|
raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, "
|
||||||
"but parameter 'light_version' is not enabled")
|
"but parameter 'light_version' is not enabled")
|
||||||
|
self.eynollah = Eynollah(
|
||||||
|
self.resolve_resource(self.parameter['models']),
|
||||||
|
logger=self.logger,
|
||||||
|
allow_enhancement=self.parameter['allow_enhancement'],
|
||||||
|
curved_line=self.parameter['curved_line'],
|
||||||
|
right2left=self.parameter['right_to_left'],
|
||||||
|
ignore_page_extraction=self.parameter['ignore_page_extraction'],
|
||||||
|
light_version=self.parameter['light_version'],
|
||||||
|
textline_light=self.parameter['textline_light'],
|
||||||
|
full_layout=self.parameter['full_layout'],
|
||||||
|
allow_scaling=self.parameter['allow_scaling'],
|
||||||
|
headers_off=self.parameter['headers_off'],
|
||||||
|
tables=self.parameter['tables'],
|
||||||
|
override_dpi=self.parameter['dpi'],
|
||||||
|
# trick Eynollah to do init independent of an image
|
||||||
|
dir_in="."
|
||||||
|
)
|
||||||
|
self.eynollah.dir_in = None
|
||||||
|
self.eynollah.plotter = None
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
if hasattr(self, 'eynollah'):
|
||||||
|
del self.eynollah
|
||||||
|
|
||||||
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
|
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
|
||||||
"""
|
"""
|
||||||
|
@ -60,27 +81,15 @@ class EynollahProcessor(Processor):
|
||||||
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
|
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
|
||||||
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
|
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
|
||||||
# FIXME: mask out already existing regions (incremental segmentation)
|
# FIXME: mask out already existing regions (incremental segmentation)
|
||||||
eynollah = Eynollah(
|
self.eynollah.image_filename = image_filename
|
||||||
self.resolve_resource(self.parameter['models']),
|
self.eynollah._imgs = self.eynollah._cache_images(
|
||||||
logger=self.logger,
|
|
||||||
allow_enhancement=self.parameter['allow_enhancement'],
|
|
||||||
curved_line=self.parameter['curved_line'],
|
|
||||||
right2left=self.parameter['right_to_left'],
|
|
||||||
ignore_page_extraction=self.parameter['ignore_page_extraction'],
|
|
||||||
light_version=self.parameter['light_version'],
|
|
||||||
textline_light=self.parameter['textline_light'],
|
|
||||||
full_layout=self.parameter['full_layout'],
|
|
||||||
allow_scaling=self.parameter['allow_scaling'],
|
|
||||||
headers_off=self.parameter['headers_off'],
|
|
||||||
tables=self.parameter['tables'],
|
|
||||||
override_dpi=self.parameter['dpi'],
|
|
||||||
pcgts=pcgts,
|
|
||||||
image_filename=image_filename,
|
|
||||||
image_pil=page_image
|
image_pil=page_image
|
||||||
)
|
)
|
||||||
if self.models is not None:
|
self.eynollah.writer = EynollahXmlWriter(
|
||||||
# reuse loaded models from previous page
|
dir_out=None,
|
||||||
eynollah.models = self.models
|
image_filename=image_filename,
|
||||||
eynollah.run()
|
curved_line=self.eynollah.curved_line,
|
||||||
self.models = eynollah.models
|
textline_light=self.eynollah.textline_light,
|
||||||
|
pcgts=pcgts)
|
||||||
|
self.eynollah.run()
|
||||||
return result
|
return result
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue