From 4a13781ef49cd964accabb41b583cd4083ce0293 Mon Sep 17 00:00:00 2001 From: kba Date: Fri, 23 Aug 2024 18:32:29 +0200 Subject: [PATCH] class Eynollah: add typing, consistent interface in CLI and OCR-D CLI --- qurator/eynollah/cli.py | 5 ++-- qurator/eynollah/eynollah.py | 58 +++++++++++++++++++----------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 822db18..99bf5ac 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -1,6 +1,6 @@ import sys import click -from ocrd_utils import initLogging, setOverrideLogLevel +from ocrd_utils import getLogger, initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah @@ -176,10 +176,11 @@ def main( print('Error: You used -tll to enable light textline detection but -light is not enabled') sys.exit(1) eynollah = Eynollah( + model, + getLogger('Eynollah'), image_filename=image, dir_out=out, dir_in=dir_in, - dir_models=model, dir_of_cropped_images=save_images, dir_of_layout=save_layout, dir_of_deskewed=save_deskewed, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 7f5561c..f80798b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -6,14 +6,18 @@ document layout analysis (segmentation) with output in PAGE-XML """ +from logging import Logger import math import os import sys import time +from typing import Optional import warnings from pathlib import Path from multiprocessing import Process, Queue, cpu_count import gc +from PIL.Image import Image +from ocrd import OcrdPage from ocrd_utils import getLogger import cv2 import numpy as np @@ -142,32 +146,32 @@ class PatchEncoder(layers.Layer): class Eynollah: def __init__( self, - dir_models, - image_filename=None, - image_pil=None, - image_filename_stem=None, - dir_out=None, - dir_in=None, - dir_of_cropped_images=None, - dir_of_layout=None, - dir_of_deskewed=None, - dir_of_all=None, - dir_save_page=None, - enable_plotting=False, - allow_enhancement=False, - curved_line=False, - textline_light=False, - full_layout=False, - tables=False, - right2left=False, - input_binary=False, - allow_scaling=False, - headers_off=False, - light_version=False, - ignore_page_extraction=False, - override_dpi=None, - logger=None, - pcgts=None, + dir_models : str, + logger : Logger, + image_filename : Optional[str] = None, + image_pil : Optional[Image] = None, + image_filename_stem : Optional[str] = None, + dir_out : Optional[str] = None, + dir_in : Optional[str] = None, + dir_of_cropped_images : Optional[str] = None, + dir_of_layout : Optional[str] = None, + dir_of_deskewed : Optional[str] = None, + dir_of_all : Optional[str] = None, + dir_save_page : Optional[str] = None, + enable_plotting : bool = False, + allow_enhancement : bool = False, + curved_line : bool = False, + textline_light : bool = False, + full_layout : bool = False, + tables : bool = False, + right2left : bool = False, + input_binary : bool = False, + allow_scaling : bool = False, + headers_off : bool = False, + light_version : bool = False, + ignore_page_extraction : bool = False, + override_dpi : Optional[int] = None, + pcgts : Optional[OcrdPage] = None, ): if not dir_in: if image_pil: @@ -213,7 +217,7 @@ class Eynollah: curved_line=self.curved_line, textline_light = self.textline_light, pcgts=pcgts) - self.logger = logger if logger else getLogger('eynollah') + self.logger = logger self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"