mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-11-09 22:24:13 +01:00
make --model-basedir and --model-overrides top-level CLI options
This commit is contained in:
parent
b6f82c72b9
commit
a913bdf7dc
18 changed files with 132 additions and 170 deletions
|
|
@ -2,20 +2,36 @@ from dataclasses import dataclass
|
|||
import sys
|
||||
import click
|
||||
import logging
|
||||
from typing import Tuple, List
|
||||
from ocrd_utils import initLogging, getLevelName, getLogger
|
||||
from eynollah.eynollah import Eynollah
|
||||
from eynollah.eynollah_ocr import Eynollah_ocr
|
||||
from eynollah.sbb_binarize import SbbBinarizer
|
||||
from eynollah.image_enhancer import Enhancer
|
||||
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
|
||||
from eynollah.model_zoo import EynollahModelZoo
|
||||
|
||||
from .cli_models import models_cli
|
||||
|
||||
@dataclass()
|
||||
class EynollahCliCtx:
|
||||
model_zoo: EynollahModelZoo
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
pass
|
||||
@click.option(
|
||||
"--model-basedir",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model-overrides",
|
||||
"-mv",
|
||||
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
|
||||
type=(str, str, str),
|
||||
multiple=True,
|
||||
)
|
||||
@click.pass_context
|
||||
def main(ctx, model_basedir, model_overrides):
|
||||
# Initialize model zoo
|
||||
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
|
||||
|
||||
main.add_command(models_cli, 'models')
|
||||
|
||||
|
|
@ -39,23 +55,17 @@ main.add_command(models_cli, 'models')
|
|||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--log_level",
|
||||
"-l",
|
||||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def machine_based_reading_order(input, dir_in, out, model, log_level):
|
||||
@click.pass_context
|
||||
def machine_based_reading_order(ctx, input, dir_in, out, log_level):
|
||||
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
|
||||
assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
orderer = machine_based_reading_order_on_layout(model)
|
||||
orderer = machine_based_reading_order_on_layout(model_zoo=ctx.obj.model_zoo)
|
||||
if log_level:
|
||||
orderer.logger.setLevel(getLevelName(log_level))
|
||||
|
||||
|
|
@ -67,7 +77,6 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
|
|||
|
||||
@main.command()
|
||||
@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
|
||||
@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction')
|
||||
@click.option(
|
||||
"--input-image", "--image",
|
||||
"-i",
|
||||
|
|
@ -92,7 +101,7 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
|
|||
'--mode',
|
||||
type=click.Choice(['single', 'multi']),
|
||||
default='single',
|
||||
help="Whether to use the (faster) single-model binarization or the (slightly better) multi-model binarization"
|
||||
help="Whether to use the (newer and faster) single-model binarization or the (slightly better) multi-model binarization"
|
||||
)
|
||||
@click.option(
|
||||
"--log_level",
|
||||
|
|
@ -100,17 +109,19 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
|
|||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
@click.pass_context
|
||||
def binarization(
|
||||
ctx,
|
||||
patches,
|
||||
model_dir,
|
||||
input_image,
|
||||
mode,
|
||||
dir_in,
|
||||
output,
|
||||
log_level,
|
||||
):
|
||||
from eynollah.sbb_binarize import SbbBinarizer
|
||||
assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
binarizer = SbbBinarizer(model_dir, mode=mode)
|
||||
binarizer = SbbBinarizer(model_zoo=ctx.obj.model_zoo, mode=mode)
|
||||
if log_level:
|
||||
binarizer.log.setLevel(getLevelName(log_level))
|
||||
binarizer.run(
|
||||
|
|
@ -148,14 +159,6 @@ def binarization(
|
|||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--num_col_upper",
|
||||
"-ncu",
|
||||
|
|
@ -178,12 +181,13 @@ def binarization(
|
|||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level):
|
||||
@click.pass_context
|
||||
def enhancement(ctx, image, out, overwrite, dir_in, num_col_upper, num_col_lower, save_org_scale, log_level):
|
||||
from eynollah.image_enhancer import Enhancer
|
||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
initLogging()
|
||||
enhancer = Enhancer(
|
||||
model,
|
||||
model_zoo=ctx.obj.model_zoo,
|
||||
num_col_upper=num_col_upper,
|
||||
num_col_lower=num_col_lower,
|
||||
save_org_scale=save_org_scale,
|
||||
|
|
@ -223,22 +227,6 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
|
|||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
'model_basedir',
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model_version",
|
||||
"-mv",
|
||||
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
|
||||
type=(str, str, str),
|
||||
multiple=True,
|
||||
)
|
||||
@click.option(
|
||||
"--save_images",
|
||||
"-si",
|
||||
|
|
@ -409,14 +397,13 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
|
|||
is_flag=True,
|
||||
help="Setup a basic console logger",
|
||||
)
|
||||
|
||||
@click.pass_context
|
||||
def layout(
|
||||
ctx,
|
||||
image,
|
||||
out,
|
||||
overwrite,
|
||||
dir_in,
|
||||
model_basedir,
|
||||
model_version,
|
||||
save_images,
|
||||
save_layout,
|
||||
save_deskewed,
|
||||
|
|
@ -447,6 +434,7 @@ def layout(
|
|||
log_level,
|
||||
setup_logging,
|
||||
):
|
||||
from eynollah.eynollah import Eynollah
|
||||
if setup_logging:
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(logging.INFO)
|
||||
|
|
@ -476,8 +464,7 @@ def layout(
|
|||
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
|
||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
eynollah = Eynollah(
|
||||
model_basedir,
|
||||
model_overrides=model_version,
|
||||
model_zoo=ctx.obj.model_zoo,
|
||||
extract_only_images=extract_only_images,
|
||||
enable_plotting=enable_plotting,
|
||||
allow_enhancement=allow_enhancement,
|
||||
|
|
@ -559,17 +546,6 @@ def layout(
|
|||
help="overwrite (instead of skipping) if output xml exists",
|
||||
is_flag=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--model_name",
|
||||
help="Specific model file path to use for OCR",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--tr_ocr",
|
||||
"-trocr/-notrocr",
|
||||
|
|
@ -609,20 +585,36 @@ def layout(
|
|||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
|
||||
@click.pass_context
|
||||
def ocr(
|
||||
ctx,
|
||||
image,
|
||||
dir_in,
|
||||
dir_in_bin,
|
||||
dir_xmls,
|
||||
out,
|
||||
dir_out_image_text,
|
||||
overwrite,
|
||||
tr_ocr,
|
||||
export_textline_images_and_text,
|
||||
do_not_mask_with_textline_contour,
|
||||
batch_size,
|
||||
dataset_abbrevation,
|
||||
min_conf_value_of_textline_text,
|
||||
log_level,
|
||||
):
|
||||
from eynollah.eynollah_ocr import Eynollah_ocr
|
||||
initLogging()
|
||||
|
||||
assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided."
|
||||
|
||||
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
|
||||
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
||||
# FIXME: refactor: move export_textline_images_and_text out of eynollah.py
|
||||
# assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
||||
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
|
||||
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
|
||||
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
|
||||
assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
|
||||
eynollah_ocr = Eynollah_ocr(
|
||||
dir_models=model,
|
||||
model_name=model_name,
|
||||
model_zoo=ctx.obj.model_zoo,
|
||||
tr_ocr=tr_ocr,
|
||||
export_textline_images_and_text=export_textline_images_and_text,
|
||||
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
||||
|
|
|
|||
|
|
@ -6,30 +6,7 @@ import click
|
|||
from eynollah.model_zoo.default_specs import MODELS_VERSION
|
||||
from .model_zoo import EynollahModelZoo
|
||||
|
||||
|
||||
@dataclass()
|
||||
class EynollahCliCtx:
|
||||
model_zoo: EynollahModelZoo
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.pass_context
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
'model_basedir',
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model-overrides",
|
||||
"-mv",
|
||||
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
|
||||
type=(str, str, str),
|
||||
multiple=True,
|
||||
)
|
||||
def models_cli(
|
||||
ctx,
|
||||
model_basedir: str,
|
||||
|
|
@ -38,7 +15,7 @@ def models_cli(
|
|||
"""
|
||||
Organize models for the various runners in eynollah.
|
||||
"""
|
||||
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
|
||||
assert ctx.obj.model_zoo
|
||||
|
||||
|
||||
@models_cli.command('list')
|
||||
|
|
|
|||
|
|
@ -138,8 +138,8 @@ num_patches =21*21#14*14#28*28#14*14#28*28
|
|||
class Eynollah:
|
||||
def __init__(
|
||||
self,
|
||||
dir_models : str,
|
||||
model_overrides: List[Tuple[str, str, str]] = [],
|
||||
*,
|
||||
model_zoo: EynollahModelZoo,
|
||||
extract_only_images : bool =False,
|
||||
enable_plotting : bool = False,
|
||||
allow_enhancement : bool = False,
|
||||
|
|
@ -164,7 +164,7 @@ class Eynollah:
|
|||
skip_layout_and_reading_order : bool = False,
|
||||
):
|
||||
self.logger = getLogger('eynollah')
|
||||
self.model_zoo = EynollahModelZoo(basedir=dir_models)
|
||||
self.model_zoo = model_zoo
|
||||
self.plotter = None
|
||||
|
||||
if skip_layout_and_reading_order:
|
||||
|
|
@ -231,12 +231,10 @@ class Eynollah:
|
|||
self.logger.warning("no GPU device available")
|
||||
|
||||
self.logger.info("Loading models...")
|
||||
self.setup_models(*model_overrides)
|
||||
self.setup_models()
|
||||
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
|
||||
|
||||
def setup_models(self, *model_overrides: Tuple[str, str, str]):
|
||||
# override defaults from CLI
|
||||
self.model_zoo.override_models(*model_overrides)
|
||||
def setup_models(self):
|
||||
|
||||
# load models, depending on modes
|
||||
# (note: loading too many models can cause OOM on GPU/CUDA,
|
||||
|
|
|
|||
|
|
@ -51,8 +51,8 @@ except ImportError:
|
|||
class Eynollah_ocr:
|
||||
def __init__(
|
||||
self,
|
||||
dir_models,
|
||||
model_name=None,
|
||||
*,
|
||||
model_zoo: EynollahModelZoo,
|
||||
dir_xmls=None,
|
||||
tr_ocr=False,
|
||||
batch_size: Optional[int]=None,
|
||||
|
|
@ -70,7 +70,7 @@ class Eynollah_ocr:
|
|||
# prefix or dataset
|
||||
self.pref_of_dataset = pref_of_dataset
|
||||
self.logger = logger if logger else getLogger('eynollah.ocr')
|
||||
self.model_zoo = EynollahModelZoo(basedir=dir_models)
|
||||
self.model_zoo = model_zoo
|
||||
|
||||
# TODO: Properly document what 'export_textline_images_and_text' is about
|
||||
if export_textline_images_and_text:
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ KERNEL = np.ones((5, 5), np.uint8)
|
|||
class Enhancer:
|
||||
def __init__(
|
||||
self,
|
||||
dir_models : str,
|
||||
*,
|
||||
model_zoo: EynollahModelZoo,
|
||||
num_col_upper : Optional[int] = None,
|
||||
num_col_lower : Optional[int] = None,
|
||||
save_org_scale : bool = False,
|
||||
|
|
@ -51,7 +52,7 @@ class Enhancer:
|
|||
self.num_col_lower = num_col_lower
|
||||
|
||||
self.logger = logger if logger else getLogger('eynollah.enhance')
|
||||
self.model_zoo = EynollahModelZoo(basedir=dir_models)
|
||||
self.model_zoo = model_zoo
|
||||
for v in ['binarization', 'enhancement', 'col_classifier', 'page']:
|
||||
self.model_zoo.load_model(v)
|
||||
|
||||
|
|
|
|||
|
|
@ -32,12 +32,12 @@ KERNEL = np.ones((5, 5), np.uint8)
|
|||
class machine_based_reading_order_on_layout:
|
||||
def __init__(
|
||||
self,
|
||||
dir_models : str,
|
||||
*,
|
||||
model_zoo: EynollahModelZoo,
|
||||
logger : Optional[Logger] = None,
|
||||
):
|
||||
self.logger = logger if logger else getLogger('mbreorder')
|
||||
self.dir_models = dir_models
|
||||
self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"
|
||||
self.model_zoo = model_zoo
|
||||
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
|
|
@ -45,7 +45,6 @@ class machine_based_reading_order_on_layout:
|
|||
except:
|
||||
self.logger.warning("no GPU device available")
|
||||
|
||||
self.model_zoo = EynollahModelZoo(basedir=dir_models)
|
||||
self.model_zoo.load_model('reading_order')
|
||||
# FIXME: light_version is always true, no need for checks in the code
|
||||
self.light_version = True
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
__all__ = [
|
||||
'EynollahModelZoo',
|
||||
'KerasModel',
|
||||
'TrOCRProcessor',
|
||||
'VisionEncoderDecoderModel',
|
||||
]
|
||||
from .model_zoo import EynollahModelZoo, KerasModel, TrOCRProcessor, VisionEncoderDecoderModel
|
||||
from .model_zoo import EynollahModelZoo
|
||||
|
|
|
|||
|
|
@ -4,11 +4,13 @@ from copy import deepcopy
|
|||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Type, Union
|
||||
|
||||
from ocrd_utils import tf_disable_interactive_logs
|
||||
tf_disable_interactive_logs()
|
||||
|
||||
from keras.layers import StringLookup
|
||||
from keras.models import Model as KerasModel
|
||||
from keras.models import load_model
|
||||
from tabulate import tabulate
|
||||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
from ..patch_encoder import PatchEncoder, Patches
|
||||
from .specs import EynollahModelSpecSet
|
||||
from .default_specs import DEFAULT_MODEL_SPECS
|
||||
|
|
@ -102,6 +104,7 @@ class EynollahModelZoo:
|
|||
elif model_category == 'characters':
|
||||
model = self._load_characters()
|
||||
elif model_category == 'trocr_processor':
|
||||
from transformers import TrOCRProcessor
|
||||
model = TrOCRProcessor.from_pretrained(model_path)
|
||||
else:
|
||||
try:
|
||||
|
|
@ -128,7 +131,10 @@ class EynollahModelZoo:
|
|||
"""
|
||||
ocr_model_dir = self.model_path('ocr', variant)
|
||||
if variant == 'tr':
|
||||
return VisionEncoderDecoderModel.from_pretrained(ocr_model_dir)
|
||||
from transformers import VisionEncoderDecoderModel
|
||||
ret = VisionEncoderDecoderModel.from_pretrained(ocr_model_dir)
|
||||
assert isinstance(ret, VisionEncoderDecoderModel)
|
||||
return ret
|
||||
else:
|
||||
ocr_model = load_model(ocr_model_dir, compile=False)
|
||||
assert isinstance(ocr_model, KerasModel)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from typing import List, TypeVar, Union
|
||||
from keras.models import Model as KerasModel
|
||||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
|
||||
AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List]
|
||||
# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
|
||||
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
# AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List]
|
||||
AnyModel = object
|
||||
T = TypeVar('T')
|
||||
|
|
|
|||
|
|
@ -24,11 +24,11 @@ def resize_image(img_in, input_height, input_width):
|
|||
|
||||
class SbbBinarizer:
|
||||
|
||||
def __init__(self, model_dir: str, mode: str, logger=None):
|
||||
def __init__(self, *, model_zoo: EynollahModelZoo, mode: str, logger=None):
|
||||
if mode not in ('single', 'multi'):
|
||||
raise ValueError(f"'mode' must be either 'multi' or 'single', not {mode}")
|
||||
self.log = logger if logger else logging.getLogger('eynollah.binarization')
|
||||
self.model_zoo = EynollahModelZoo(basedir=model_dir)
|
||||
self.model_zoo = model_zoo
|
||||
self.models = self.setup_models(mode)
|
||||
self.session = self.start_new_session()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,17 @@
|
|||
from typing import List
|
||||
from click import Command
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
from click.testing import CliRunner, Result
|
||||
from eynollah.cli import main as eynollah_cli
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def run_eynollah_ok_and_check_logs(
|
||||
pytestconfig,
|
||||
caplog,
|
||||
model_dir,
|
||||
eynollah_subcommands,
|
||||
eynollah_log_filter,
|
||||
):
|
||||
"""
|
||||
|
|
@ -18,14 +20,23 @@ def run_eynollah_ok_and_check_logs(
|
|||
every fragment in `expected_logs`
|
||||
"""
|
||||
|
||||
def _run_click_ok_logs(cli: Command, args: List[str], expected_logs: List[str]) -> Result:
|
||||
args = ['-m', model_dir] + args
|
||||
def _run_click_ok_logs(
|
||||
subcommand: 'str',
|
||||
args: List[str],
|
||||
expected_logs: List[str],
|
||||
) -> Result:
|
||||
assert subcommand in eynollah_subcommands, f'subcommand {subcommand} must be one of {eynollah_subcommands}'
|
||||
args = [
|
||||
'-m', model_dir,
|
||||
subcommand,
|
||||
*args
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
caplog.set_level(logging.INFO)
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(eynollah_log_filter):
|
||||
result = runner.invoke(cli, args, catch_exceptions=False)
|
||||
result = runner.invoke(eynollah_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
if expected_logs:
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
|
|
|
|||
|
|
@ -1,10 +1,5 @@
|
|||
import pytest
|
||||
from PIL import Image
|
||||
from eynollah.cli import (
|
||||
binarization as binarization_cli,
|
||||
)
|
||||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.constants import NAMESPACES as NS
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
|
|
@ -21,7 +16,7 @@ def test_run_eynollah_binarization_filename(
|
|||
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
||||
run_eynollah_ok_and_check_logs(
|
||||
binarization_cli,
|
||||
'binarization',
|
||||
[
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile),
|
||||
|
|
@ -45,7 +40,7 @@ def test_run_eynollah_binarization_directory(
|
|||
):
|
||||
outdir = tmp_path
|
||||
run_eynollah_ok_and_check_logs(
|
||||
binarization_cli,
|
||||
'binarization',
|
||||
[
|
||||
'-di', str(resources_dir),
|
||||
'-o', str(outdir),
|
||||
|
|
|
|||
|
|
@ -1,10 +1,5 @@
|
|||
import pytest
|
||||
from PIL import Image
|
||||
from eynollah.cli import (
|
||||
enhancement as enhancement_cli,
|
||||
)
|
||||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.constants import NAMESPACES as NS
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
|
|
@ -21,7 +16,7 @@ def test_run_eynollah_enhancement_filename(
|
|||
infile = resources_dir / 'kant_aufklaerung_1784_0020.tif'
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
||||
run_eynollah_ok_and_check_logs(
|
||||
enhancement_cli,
|
||||
'enhancement',
|
||||
[
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
|
|
@ -44,7 +39,7 @@ def test_run_eynollah_enhancement_directory(
|
|||
):
|
||||
outdir = tmp_path
|
||||
run_eynollah_ok_and_check_logs(
|
||||
enhancement_cli,
|
||||
'enhancement',
|
||||
[
|
||||
'-di', str(resources_dir),
|
||||
'-o', str(outdir),
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
import pytest
|
||||
from eynollah.cli import (
|
||||
layout as layout_cli,
|
||||
)
|
||||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.constants import NAMESPACES as NS
|
||||
|
||||
|
|
@ -30,11 +27,10 @@ def test_run_eynollah_layout_filename(
|
|||
resources_dir,
|
||||
options,
|
||||
):
|
||||
outdir = tmp_path
|
||||
infile = resources_dir / 'kant_aufklaerung_1784_0020.tif'
|
||||
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
|
||||
run_eynollah_ok_and_check_logs(
|
||||
layout_cli,
|
||||
'layout',
|
||||
[
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
|
|
@ -68,7 +64,7 @@ def test_run_eynollah_layout_filename2(
|
|||
infile = resources_dir / 'euler_rechenkunst01_1738_0025.tif'
|
||||
outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml'
|
||||
run_eynollah_ok_and_check_logs(
|
||||
layout_cli,
|
||||
'layout',
|
||||
[
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
|
|
@ -96,7 +92,7 @@ def test_run_eynollah_layout_directory(
|
|||
):
|
||||
outdir = tmp_path
|
||||
run_eynollah_ok_and_check_logs(
|
||||
layout_cli,
|
||||
'layout',
|
||||
[
|
||||
'-di', str(resources_dir),
|
||||
'-o', str(outdir),
|
||||
|
|
|
|||
|
|
@ -1,11 +1,6 @@
|
|||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.constants import NAMESPACES as NS
|
||||
|
||||
from eynollah.cli import (
|
||||
machine_based_reading_order as mbreorder_cli,
|
||||
)
|
||||
|
||||
|
||||
def test_run_eynollah_mbreorder_filename(
|
||||
tmp_path,
|
||||
resources_dir,
|
||||
|
|
@ -14,7 +9,7 @@ def test_run_eynollah_mbreorder_filename(
|
|||
infile = resources_dir / 'kant_aufklaerung_1784_0020.xml'
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
||||
run_eynollah_ok_and_check_logs(
|
||||
mbreorder_cli,
|
||||
'machine-based-reading-order',
|
||||
[
|
||||
'-i', str(infile),
|
||||
'-o', str(outfile.parent),
|
||||
|
|
@ -37,10 +32,9 @@ def test_run_eynollah_mbreorder_directory(
|
|||
resources_dir,
|
||||
run_eynollah_ok_and_check_logs,
|
||||
):
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
||||
outdir = tmp_path
|
||||
run_eynollah_ok_and_check_logs(
|
||||
mbreorder_cli,
|
||||
'machine-based-reading-order',
|
||||
[
|
||||
'-di', str(resources_dir),
|
||||
'-o', str(outdir),
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
import pytest
|
||||
from eynollah.cli import (
|
||||
ocr as ocr_cli,
|
||||
)
|
||||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.constants import NAMESPACES as NS
|
||||
|
||||
|
|
@ -26,7 +23,7 @@ def test_run_eynollah_ocr_filename(
|
|||
if "-doit" in options:
|
||||
options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
|
||||
run_eynollah_ok_and_check_logs(
|
||||
ocr_cli,
|
||||
'ocr',
|
||||
[
|
||||
'-i', str(infile),
|
||||
'-dx', str(infile.parent),
|
||||
|
|
@ -53,7 +50,7 @@ def test_run_eynollah_ocr_directory(
|
|||
):
|
||||
outdir = tmp_path
|
||||
run_eynollah_ok_and_check_logs(
|
||||
ocr_cli,
|
||||
'ocr',
|
||||
[
|
||||
'-di', str(resources_dir),
|
||||
'-dx', str(resources_dir),
|
||||
|
|
|
|||
|
|
@ -1,10 +0,0 @@
|
|||
import pytest
|
||||
from PIL import Image
|
||||
from eynollah.cli import (
|
||||
layout as layout_cli,
|
||||
binarization as binarization_cli,
|
||||
enhancement as enhancement_cli,
|
||||
)
|
||||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.constants import NAMESPACES as NS
|
||||
|
||||
|
|
@ -23,3 +23,15 @@ def image_resources(resources_dir):
|
|||
@pytest.fixture()
|
||||
def eynollah_log_filter():
|
||||
return lambda logrec: logrec.name.startswith('eynollah')
|
||||
|
||||
@pytest.fixture
|
||||
def eynollah_subcommands():
|
||||
return [
|
||||
'binarization',
|
||||
'layout',
|
||||
'ocr',
|
||||
'enhancement',
|
||||
'machine-based-reading-order'
|
||||
'models'
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue