make --model-basedir and --model-overrides top-level CLI options

This commit is contained in:
kba 2025-10-29 18:24:17 +01:00
parent b6f82c72b9
commit a913bdf7dc
18 changed files with 132 additions and 170 deletions

View file

@ -2,20 +2,36 @@ from dataclasses import dataclass
import sys import sys
import click import click
import logging import logging
from typing import Tuple, List
from ocrd_utils import initLogging, getLevelName, getLogger from ocrd_utils import initLogging, getLevelName, getLogger
from eynollah.eynollah import Eynollah
from eynollah.eynollah_ocr import Eynollah_ocr
from eynollah.sbb_binarize import SbbBinarizer
from eynollah.image_enhancer import Enhancer
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
from eynollah.model_zoo import EynollahModelZoo from eynollah.model_zoo import EynollahModelZoo
from .cli_models import models_cli from .cli_models import models_cli
@dataclass()
class EynollahCliCtx:
model_zoo: EynollahModelZoo
@click.group() @click.group()
def main(): @click.option(
pass "--model-basedir",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model-overrides",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
@click.pass_context
def main(ctx, model_basedir, model_overrides):
# Initialize model zoo
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
main.add_command(models_cli, 'models') main.add_command(models_cli, 'models')
@ -39,23 +55,17 @@ main.add_command(models_cli, 'models')
type=click.Path(exists=True, file_okay=False), type=click.Path(exists=True, file_okay=False),
required=True, required=True,
) )
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option( @click.option(
"--log_level", "--log_level",
"-l", "-l",
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override log level globally to this",
) )
@click.pass_context
def machine_based_reading_order(input, dir_in, out, model, log_level): def machine_based_reading_order(ctx, input, dir_in, out, log_level):
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
orderer = machine_based_reading_order_on_layout(model) orderer = machine_based_reading_order_on_layout(model_zoo=ctx.obj.model_zoo)
if log_level: if log_level:
orderer.logger.setLevel(getLevelName(log_level)) orderer.logger.setLevel(getLevelName(log_level))
@ -67,7 +77,6 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
@main.command() @main.command()
@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction')
@click.option( @click.option(
"--input-image", "--image", "--input-image", "--image",
"-i", "-i",
@ -92,7 +101,7 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
'--mode', '--mode',
type=click.Choice(['single', 'multi']), type=click.Choice(['single', 'multi']),
default='single', default='single',
help="Whether to use the (faster) single-model binarization or the (slightly better) multi-model binarization" help="Whether to use the (newer and faster) single-model binarization or the (slightly better) multi-model binarization"
) )
@click.option( @click.option(
"--log_level", "--log_level",
@ -100,17 +109,19 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override log level globally to this",
) )
@click.pass_context
def binarization( def binarization(
ctx,
patches, patches,
model_dir,
input_image, input_image,
mode, mode,
dir_in, dir_in,
output, output,
log_level, log_level,
): ):
from eynollah.sbb_binarize import SbbBinarizer
assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
binarizer = SbbBinarizer(model_dir, mode=mode) binarizer = SbbBinarizer(model_zoo=ctx.obj.model_zoo, mode=mode)
if log_level: if log_level:
binarizer.log.setLevel(getLevelName(log_level)) binarizer.log.setLevel(getLevelName(log_level))
binarizer.run( binarizer.run(
@ -148,14 +159,6 @@ def binarization(
help="directory of input images (instead of --image)", help="directory of input images (instead of --image)",
type=click.Path(exists=True, file_okay=False), type=click.Path(exists=True, file_okay=False),
) )
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option( @click.option(
"--num_col_upper", "--num_col_upper",
"-ncu", "-ncu",
@ -178,12 +181,13 @@ def binarization(
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override log level globally to this",
) )
@click.pass_context
def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level): def enhancement(ctx, image, out, overwrite, dir_in, num_col_upper, num_col_lower, save_org_scale, log_level):
from eynollah.image_enhancer import Enhancer
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
initLogging() initLogging()
enhancer = Enhancer( enhancer = Enhancer(
model, model_zoo=ctx.obj.model_zoo,
num_col_upper=num_col_upper, num_col_upper=num_col_upper,
num_col_lower=num_col_lower, num_col_lower=num_col_lower,
save_org_scale=save_org_scale, save_org_scale=save_org_scale,
@ -223,22 +227,6 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
help="directory of input images (instead of --image)", help="directory of input images (instead of --image)",
type=click.Path(exists=True, file_okay=False), type=click.Path(exists=True, file_okay=False),
) )
@click.option(
"--model",
"-m",
'model_basedir',
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model_version",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
@click.option( @click.option(
"--save_images", "--save_images",
"-si", "-si",
@ -409,14 +397,13 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
is_flag=True, is_flag=True,
help="Setup a basic console logger", help="Setup a basic console logger",
) )
@click.pass_context
def layout( def layout(
ctx,
image, image,
out, out,
overwrite, overwrite,
dir_in, dir_in,
model_basedir,
model_version,
save_images, save_images,
save_layout, save_layout,
save_deskewed, save_deskewed,
@ -447,6 +434,7 @@ def layout(
log_level, log_level,
setup_logging, setup_logging,
): ):
from eynollah.eynollah import Eynollah
if setup_logging: if setup_logging:
console_handler = logging.StreamHandler(sys.stdout) console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO) console_handler.setLevel(logging.INFO)
@ -476,8 +464,7 @@ def layout(
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho" assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
eynollah = Eynollah( eynollah = Eynollah(
model_basedir, model_zoo=ctx.obj.model_zoo,
model_overrides=model_version,
extract_only_images=extract_only_images, extract_only_images=extract_only_images,
enable_plotting=enable_plotting, enable_plotting=enable_plotting,
allow_enhancement=allow_enhancement, allow_enhancement=allow_enhancement,
@ -559,17 +546,6 @@ def layout(
help="overwrite (instead of skipping) if output xml exists", help="overwrite (instead of skipping) if output xml exists",
is_flag=True, is_flag=True,
) )
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--model_name",
help="Specific model file path to use for OCR",
type=click.Path(exists=True, file_okay=False),
)
@click.option( @click.option(
"--tr_ocr", "--tr_ocr",
"-trocr/-notrocr", "-trocr/-notrocr",
@ -609,20 +585,36 @@ def layout(
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override log level globally to this",
) )
@click.pass_context
def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): def ocr(
ctx,
image,
dir_in,
dir_in_bin,
dir_xmls,
out,
dir_out_image_text,
overwrite,
tr_ocr,
export_textline_images_and_text,
do_not_mask_with_textline_contour,
batch_size,
dataset_abbrevation,
min_conf_value_of_textline_text,
log_level,
):
from eynollah.eynollah_ocr import Eynollah_ocr
initLogging() initLogging()
assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided."
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr" assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m" # FIXME: refactor: move export_textline_images_and_text out of eynollah.py
# assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs" assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib" assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both." assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
eynollah_ocr = Eynollah_ocr( eynollah_ocr = Eynollah_ocr(
dir_models=model, model_zoo=ctx.obj.model_zoo,
model_name=model_name,
tr_ocr=tr_ocr, tr_ocr=tr_ocr,
export_textline_images_and_text=export_textline_images_and_text, export_textline_images_and_text=export_textline_images_and_text,
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,

View file

@ -6,30 +6,7 @@ import click
from eynollah.model_zoo.default_specs import MODELS_VERSION from eynollah.model_zoo.default_specs import MODELS_VERSION
from .model_zoo import EynollahModelZoo from .model_zoo import EynollahModelZoo
@dataclass()
class EynollahCliCtx:
model_zoo: EynollahModelZoo
@click.group() @click.group()
@click.pass_context
@click.option(
"--model",
"-m",
'model_basedir',
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model-overrides",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
def models_cli( def models_cli(
ctx, ctx,
model_basedir: str, model_basedir: str,
@ -38,7 +15,7 @@ def models_cli(
""" """
Organize models for the various runners in eynollah. Organize models for the various runners in eynollah.
""" """
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides)) assert ctx.obj.model_zoo
@models_cli.command('list') @models_cli.command('list')

View file

@ -138,8 +138,8 @@ num_patches =21*21#14*14#28*28#14*14#28*28
class Eynollah: class Eynollah:
def __init__( def __init__(
self, self,
dir_models : str, *,
model_overrides: List[Tuple[str, str, str]] = [], model_zoo: EynollahModelZoo,
extract_only_images : bool =False, extract_only_images : bool =False,
enable_plotting : bool = False, enable_plotting : bool = False,
allow_enhancement : bool = False, allow_enhancement : bool = False,
@ -164,7 +164,7 @@ class Eynollah:
skip_layout_and_reading_order : bool = False, skip_layout_and_reading_order : bool = False,
): ):
self.logger = getLogger('eynollah') self.logger = getLogger('eynollah')
self.model_zoo = EynollahModelZoo(basedir=dir_models) self.model_zoo = model_zoo
self.plotter = None self.plotter = None
if skip_layout_and_reading_order: if skip_layout_and_reading_order:
@ -231,12 +231,10 @@ class Eynollah:
self.logger.warning("no GPU device available") self.logger.warning("no GPU device available")
self.logger.info("Loading models...") self.logger.info("Loading models...")
self.setup_models(*model_overrides) self.setup_models()
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)") self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
def setup_models(self, *model_overrides: Tuple[str, str, str]): def setup_models(self):
# override defaults from CLI
self.model_zoo.override_models(*model_overrides)
# load models, depending on modes # load models, depending on modes
# (note: loading too many models can cause OOM on GPU/CUDA, # (note: loading too many models can cause OOM on GPU/CUDA,

View file

@ -51,8 +51,8 @@ except ImportError:
class Eynollah_ocr: class Eynollah_ocr:
def __init__( def __init__(
self, self,
dir_models, *,
model_name=None, model_zoo: EynollahModelZoo,
dir_xmls=None, dir_xmls=None,
tr_ocr=False, tr_ocr=False,
batch_size: Optional[int]=None, batch_size: Optional[int]=None,
@ -70,7 +70,7 @@ class Eynollah_ocr:
# prefix or dataset # prefix or dataset
self.pref_of_dataset = pref_of_dataset self.pref_of_dataset = pref_of_dataset
self.logger = logger if logger else getLogger('eynollah.ocr') self.logger = logger if logger else getLogger('eynollah.ocr')
self.model_zoo = EynollahModelZoo(basedir=dir_models) self.model_zoo = model_zoo
# TODO: Properly document what 'export_textline_images_and_text' is about # TODO: Properly document what 'export_textline_images_and_text' is about
if export_textline_images_and_text: if export_textline_images_and_text:

View file

@ -32,7 +32,8 @@ KERNEL = np.ones((5, 5), np.uint8)
class Enhancer: class Enhancer:
def __init__( def __init__(
self, self,
dir_models : str, *,
model_zoo: EynollahModelZoo,
num_col_upper : Optional[int] = None, num_col_upper : Optional[int] = None,
num_col_lower : Optional[int] = None, num_col_lower : Optional[int] = None,
save_org_scale : bool = False, save_org_scale : bool = False,
@ -51,7 +52,7 @@ class Enhancer:
self.num_col_lower = num_col_lower self.num_col_lower = num_col_lower
self.logger = logger if logger else getLogger('eynollah.enhance') self.logger = logger if logger else getLogger('eynollah.enhance')
self.model_zoo = EynollahModelZoo(basedir=dir_models) self.model_zoo = model_zoo
for v in ['binarization', 'enhancement', 'col_classifier', 'page']: for v in ['binarization', 'enhancement', 'col_classifier', 'page']:
self.model_zoo.load_model(v) self.model_zoo.load_model(v)

View file

@ -32,12 +32,12 @@ KERNEL = np.ones((5, 5), np.uint8)
class machine_based_reading_order_on_layout: class machine_based_reading_order_on_layout:
def __init__( def __init__(
self, self,
dir_models : str, *,
model_zoo: EynollahModelZoo,
logger : Optional[Logger] = None, logger : Optional[Logger] = None,
): ):
self.logger = logger if logger else getLogger('mbreorder') self.logger = logger if logger else getLogger('mbreorder')
self.dir_models = dir_models self.model_zoo = model_zoo
self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"
try: try:
for device in tf.config.list_physical_devices('GPU'): for device in tf.config.list_physical_devices('GPU'):
@ -45,7 +45,6 @@ class machine_based_reading_order_on_layout:
except: except:
self.logger.warning("no GPU device available") self.logger.warning("no GPU device available")
self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_zoo.load_model('reading_order') self.model_zoo.load_model('reading_order')
# FIXME: light_version is always true, no need for checks in the code # FIXME: light_version is always true, no need for checks in the code
self.light_version = True self.light_version = True

View file

@ -1,7 +1,4 @@
__all__ = [ __all__ = [
'EynollahModelZoo', 'EynollahModelZoo',
'KerasModel',
'TrOCRProcessor',
'VisionEncoderDecoderModel',
] ]
from .model_zoo import EynollahModelZoo, KerasModel, TrOCRProcessor, VisionEncoderDecoderModel from .model_zoo import EynollahModelZoo

View file

@ -4,11 +4,13 @@ from copy import deepcopy
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type, Union from typing import Dict, List, Optional, Tuple, Type, Union
from ocrd_utils import tf_disable_interactive_logs
tf_disable_interactive_logs()
from keras.layers import StringLookup from keras.layers import StringLookup
from keras.models import Model as KerasModel from keras.models import Model as KerasModel
from keras.models import load_model from keras.models import load_model
from tabulate import tabulate from tabulate import tabulate
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from ..patch_encoder import PatchEncoder, Patches from ..patch_encoder import PatchEncoder, Patches
from .specs import EynollahModelSpecSet from .specs import EynollahModelSpecSet
from .default_specs import DEFAULT_MODEL_SPECS from .default_specs import DEFAULT_MODEL_SPECS
@ -102,6 +104,7 @@ class EynollahModelZoo:
elif model_category == 'characters': elif model_category == 'characters':
model = self._load_characters() model = self._load_characters()
elif model_category == 'trocr_processor': elif model_category == 'trocr_processor':
from transformers import TrOCRProcessor
model = TrOCRProcessor.from_pretrained(model_path) model = TrOCRProcessor.from_pretrained(model_path)
else: else:
try: try:
@ -128,7 +131,10 @@ class EynollahModelZoo:
""" """
ocr_model_dir = self.model_path('ocr', variant) ocr_model_dir = self.model_path('ocr', variant)
if variant == 'tr': if variant == 'tr':
return VisionEncoderDecoderModel.from_pretrained(ocr_model_dir) from transformers import VisionEncoderDecoderModel
ret = VisionEncoderDecoderModel.from_pretrained(ocr_model_dir)
assert isinstance(ret, VisionEncoderDecoderModel)
return ret
else: else:
ocr_model = load_model(ocr_model_dir, compile=False) ocr_model = load_model(ocr_model_dir, compile=False)
assert isinstance(ocr_model, KerasModel) assert isinstance(ocr_model, KerasModel)

View file

@ -1,6 +1,8 @@
from typing import List, TypeVar, Union from typing import List, TypeVar, Union
from keras.models import Model as KerasModel from keras.models import Model as KerasModel
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List] # NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List]
AnyModel = object
T = TypeVar('T') T = TypeVar('T')

View file

@ -24,11 +24,11 @@ def resize_image(img_in, input_height, input_width):
class SbbBinarizer: class SbbBinarizer:
def __init__(self, model_dir: str, mode: str, logger=None): def __init__(self, *, model_zoo: EynollahModelZoo, mode: str, logger=None):
if mode not in ('single', 'multi'): if mode not in ('single', 'multi'):
raise ValueError(f"'mode' must be either 'multi' or 'single', not {mode}") raise ValueError(f"'mode' must be either 'multi' or 'single', not {mode}")
self.log = logger if logger else logging.getLogger('eynollah.binarization') self.log = logger if logger else logging.getLogger('eynollah.binarization')
self.model_zoo = EynollahModelZoo(basedir=model_dir) self.model_zoo = model_zoo
self.models = self.setup_models(mode) self.models = self.setup_models(mode)
self.session = self.start_new_session() self.session = self.start_new_session()

View file

@ -1,15 +1,17 @@
from typing import List from typing import List
from click import Command
import pytest import pytest
import logging import logging
from click.testing import CliRunner, Result from click.testing import CliRunner, Result
from eynollah.cli import main as eynollah_cli
@pytest.fixture @pytest.fixture
def run_eynollah_ok_and_check_logs( def run_eynollah_ok_and_check_logs(
pytestconfig, pytestconfig,
caplog, caplog,
model_dir, model_dir,
eynollah_subcommands,
eynollah_log_filter, eynollah_log_filter,
): ):
""" """
@ -18,14 +20,23 @@ def run_eynollah_ok_and_check_logs(
every fragment in `expected_logs` every fragment in `expected_logs`
""" """
def _run_click_ok_logs(cli: Command, args: List[str], expected_logs: List[str]) -> Result: def _run_click_ok_logs(
args = ['-m', model_dir] + args subcommand: 'str',
args: List[str],
expected_logs: List[str],
) -> Result:
assert subcommand in eynollah_subcommands, f'subcommand {subcommand} must be one of {eynollah_subcommands}'
args = [
'-m', model_dir,
subcommand,
*args
]
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
runner = CliRunner() runner = CliRunner()
with caplog.filtering(eynollah_log_filter): with caplog.filtering(eynollah_log_filter):
result = runner.invoke(cli, args, catch_exceptions=False) result = runner.invoke(eynollah_cli, args, catch_exceptions=False)
assert result.exit_code == 0, result.stdout assert result.exit_code == 0, result.stdout
if expected_logs: if expected_logs:
logmsgs = [logrec.message for logrec in caplog.records] logmsgs = [logrec.message for logrec in caplog.records]

View file

@ -1,10 +1,5 @@
import pytest import pytest
from PIL import Image from PIL import Image
from eynollah.cli import (
binarization as binarization_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
@pytest.mark.parametrize( @pytest.mark.parametrize(
"options", "options",
@ -21,7 +16,7 @@ def test_run_eynollah_binarization_filename(
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
binarization_cli, 'binarization',
[ [
'-i', str(infile), '-i', str(infile),
'-o', str(outfile), '-o', str(outfile),
@ -45,7 +40,7 @@ def test_run_eynollah_binarization_directory(
): ):
outdir = tmp_path outdir = tmp_path
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
binarization_cli, 'binarization',
[ [
'-di', str(resources_dir), '-di', str(resources_dir),
'-o', str(outdir), '-o', str(outdir),

View file

@ -1,10 +1,5 @@
import pytest import pytest
from PIL import Image from PIL import Image
from eynollah.cli import (
enhancement as enhancement_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
@pytest.mark.parametrize( @pytest.mark.parametrize(
"options", "options",
@ -21,7 +16,7 @@ def test_run_eynollah_enhancement_filename(
infile = resources_dir / 'kant_aufklaerung_1784_0020.tif' infile = resources_dir / 'kant_aufklaerung_1784_0020.tif'
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
enhancement_cli, 'enhancement',
[ [
'-i', str(infile), '-i', str(infile),
'-o', str(outfile.parent), '-o', str(outfile.parent),
@ -44,7 +39,7 @@ def test_run_eynollah_enhancement_directory(
): ):
outdir = tmp_path outdir = tmp_path
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
enhancement_cli, 'enhancement',
[ [
'-di', str(resources_dir), '-di', str(resources_dir),
'-o', str(outdir), '-o', str(outdir),

View file

@ -1,7 +1,4 @@
import pytest import pytest
from eynollah.cli import (
layout as layout_cli,
)
from ocrd_modelfactory import page_from_file from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS from ocrd_models.constants import NAMESPACES as NS
@ -30,11 +27,10 @@ def test_run_eynollah_layout_filename(
resources_dir, resources_dir,
options, options,
): ):
outdir = tmp_path
infile = resources_dir / 'kant_aufklaerung_1784_0020.tif' infile = resources_dir / 'kant_aufklaerung_1784_0020.tif'
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
layout_cli, 'layout',
[ [
'-i', str(infile), '-i', str(infile),
'-o', str(outfile.parent), '-o', str(outfile.parent),
@ -68,7 +64,7 @@ def test_run_eynollah_layout_filename2(
infile = resources_dir / 'euler_rechenkunst01_1738_0025.tif' infile = resources_dir / 'euler_rechenkunst01_1738_0025.tif'
outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml' outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml'
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
layout_cli, 'layout',
[ [
'-i', str(infile), '-i', str(infile),
'-o', str(outfile.parent), '-o', str(outfile.parent),
@ -96,7 +92,7 @@ def test_run_eynollah_layout_directory(
): ):
outdir = tmp_path outdir = tmp_path
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
layout_cli, 'layout',
[ [
'-di', str(resources_dir), '-di', str(resources_dir),
'-o', str(outdir), '-o', str(outdir),

View file

@ -1,11 +1,6 @@
from ocrd_modelfactory import page_from_file from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS from ocrd_models.constants import NAMESPACES as NS
from eynollah.cli import (
machine_based_reading_order as mbreorder_cli,
)
def test_run_eynollah_mbreorder_filename( def test_run_eynollah_mbreorder_filename(
tmp_path, tmp_path,
resources_dir, resources_dir,
@ -14,7 +9,7 @@ def test_run_eynollah_mbreorder_filename(
infile = resources_dir / 'kant_aufklaerung_1784_0020.xml' infile = resources_dir / 'kant_aufklaerung_1784_0020.xml'
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
mbreorder_cli, 'machine-based-reading-order',
[ [
'-i', str(infile), '-i', str(infile),
'-o', str(outfile.parent), '-o', str(outfile.parent),
@ -37,10 +32,9 @@ def test_run_eynollah_mbreorder_directory(
resources_dir, resources_dir,
run_eynollah_ok_and_check_logs, run_eynollah_ok_and_check_logs,
): ):
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
outdir = tmp_path outdir = tmp_path
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
mbreorder_cli, 'machine-based-reading-order',
[ [
'-di', str(resources_dir), '-di', str(resources_dir),
'-o', str(outdir), '-o', str(outdir),

View file

@ -1,7 +1,4 @@
import pytest import pytest
from eynollah.cli import (
ocr as ocr_cli,
)
from ocrd_modelfactory import page_from_file from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS from ocrd_models.constants import NAMESPACES as NS
@ -26,7 +23,7 @@ def test_run_eynollah_ocr_filename(
if "-doit" in options: if "-doit" in options:
options.insert(options.index("-doit") + 1, str(outrenderfile.parent)) options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
ocr_cli, 'ocr',
[ [
'-i', str(infile), '-i', str(infile),
'-dx', str(infile.parent), '-dx', str(infile.parent),
@ -53,7 +50,7 @@ def test_run_eynollah_ocr_directory(
): ):
outdir = tmp_path outdir = tmp_path
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
ocr_cli, 'ocr',
[ [
'-di', str(resources_dir), '-di', str(resources_dir),
'-dx', str(resources_dir), '-dx', str(resources_dir),

View file

@ -1,10 +0,0 @@
import pytest
from PIL import Image
from eynollah.cli import (
layout as layout_cli,
binarization as binarization_cli,
enhancement as enhancement_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS

View file

@ -23,3 +23,15 @@ def image_resources(resources_dir):
@pytest.fixture() @pytest.fixture()
def eynollah_log_filter(): def eynollah_log_filter():
return lambda logrec: logrec.name.startswith('eynollah') return lambda logrec: logrec.name.startswith('eynollah')
@pytest.fixture
def eynollah_subcommands():
return [
'binarization',
'layout',
'ocr',
'enhancement',
'machine-based-reading-order'
'models'
]