diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 595f0ee..9ae909f 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -2,20 +2,36 @@ from dataclasses import dataclass import sys import click import logging -from typing import Tuple, List from ocrd_utils import initLogging, getLevelName, getLogger -from eynollah.eynollah import Eynollah -from eynollah.eynollah_ocr import Eynollah_ocr -from eynollah.sbb_binarize import SbbBinarizer -from eynollah.image_enhancer import Enhancer -from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout from eynollah.model_zoo import EynollahModelZoo from .cli_models import models_cli +@dataclass() +class EynollahCliCtx: + model_zoo: EynollahModelZoo + + @click.group() -def main(): - pass +@click.option( + "--model-basedir", + "-m", + help="directory of models", + type=click.Path(exists=True, file_okay=False), + # default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment", + required=True, +) +@click.option( + "--model-overrides", + "-mv", + help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list", + type=(str, str, str), + multiple=True, +) +@click.pass_context +def main(ctx, model_basedir, model_overrides): + # Initialize model zoo + ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides)) main.add_command(models_cli, 'models') @@ -39,23 +55,17 @@ main.add_command(models_cli, 'models') type=click.Path(exists=True, file_okay=False), required=True, ) -@click.option( - "--model", - "-m", - help="directory of models", - type=click.Path(exists=True, file_okay=False), - required=True, -) @click.option( "--log_level", "-l", type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) - -def machine_based_reading_order(input, dir_in, out, model, log_level): +@click.pass_context +def machine_based_reading_order(ctx, input, dir_in, out, log_level): + from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." - orderer = machine_based_reading_order_on_layout(model) + orderer = machine_based_reading_order_on_layout(model_zoo=ctx.obj.model_zoo) if log_level: orderer.logger.setLevel(getLevelName(log_level)) @@ -67,7 +77,6 @@ def machine_based_reading_order(input, dir_in, out, model, log_level): @main.command() @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') -@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') @click.option( "--input-image", "--image", "-i", @@ -92,7 +101,7 @@ def machine_based_reading_order(input, dir_in, out, model, log_level): '--mode', type=click.Choice(['single', 'multi']), default='single', - help="Whether to use the (faster) single-model binarization or the (slightly better) multi-model binarization" + help="Whether to use the (newer and faster) single-model binarization or the (slightly better) multi-model binarization" ) @click.option( "--log_level", @@ -100,17 +109,19 @@ def machine_based_reading_order(input, dir_in, out, model, log_level): type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) +@click.pass_context def binarization( + ctx, patches, - model_dir, input_image, mode, dir_in, output, log_level, ): + from eynollah.sbb_binarize import SbbBinarizer assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." - binarizer = SbbBinarizer(model_dir, mode=mode) + binarizer = SbbBinarizer(model_zoo=ctx.obj.model_zoo, mode=mode) if log_level: binarizer.log.setLevel(getLevelName(log_level)) binarizer.run( @@ -148,14 +159,6 @@ def binarization( help="directory of input images (instead of --image)", type=click.Path(exists=True, file_okay=False), ) -@click.option( - "--model", - "-m", - help="directory of models", - type=click.Path(exists=True, file_okay=False), - required=True, -) - @click.option( "--num_col_upper", "-ncu", @@ -178,12 +181,13 @@ def binarization( type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) - -def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level): +@click.pass_context +def enhancement(ctx, image, out, overwrite, dir_in, num_col_upper, num_col_lower, save_org_scale, log_level): + from eynollah.image_enhancer import Enhancer assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." initLogging() enhancer = Enhancer( - model, + model_zoo=ctx.obj.model_zoo, num_col_upper=num_col_upper, num_col_lower=num_col_lower, save_org_scale=save_org_scale, @@ -223,22 +227,6 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low help="directory of input images (instead of --image)", type=click.Path(exists=True, file_okay=False), ) -@click.option( - "--model", - "-m", - 'model_basedir', - help="directory of models", - type=click.Path(exists=True, file_okay=False), - # default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment", - required=True, -) -@click.option( - "--model_version", - "-mv", - help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list", - type=(str, str, str), - multiple=True, -) @click.option( "--save_images", "-si", @@ -409,14 +397,13 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low is_flag=True, help="Setup a basic console logger", ) - +@click.pass_context def layout( + ctx, image, out, overwrite, dir_in, - model_basedir, - model_version, save_images, save_layout, save_deskewed, @@ -447,6 +434,7 @@ def layout( log_level, setup_logging, ): + from eynollah.eynollah import Eynollah if setup_logging: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) @@ -476,8 +464,7 @@ def layout( assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho" assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." eynollah = Eynollah( - model_basedir, - model_overrides=model_version, + model_zoo=ctx.obj.model_zoo, extract_only_images=extract_only_images, enable_plotting=enable_plotting, allow_enhancement=allow_enhancement, @@ -559,17 +546,6 @@ def layout( help="overwrite (instead of skipping) if output xml exists", is_flag=True, ) -@click.option( - "--model", - "-m", - help="directory of models", - type=click.Path(exists=True, file_okay=False), -) -@click.option( - "--model_name", - help="Specific model file path to use for OCR", - type=click.Path(exists=True, file_okay=False), -) @click.option( "--tr_ocr", "-trocr/-notrocr", @@ -609,20 +585,36 @@ def layout( type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) - -def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): +@click.pass_context +def ocr( + ctx, + image, + dir_in, + dir_in_bin, + dir_xmls, + out, + dir_out_image_text, + overwrite, + tr_ocr, + export_textline_images_and_text, + do_not_mask_with_textline_contour, + batch_size, + dataset_abbrevation, + min_conf_value_of_textline_text, + log_level, +): + from eynollah.eynollah_ocr import Eynollah_ocr initLogging() - - assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided." + assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr" - assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m" + # FIXME: refactor: move export_textline_images_and_text out of eynollah.py + # assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m" assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs" assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib" assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both." eynollah_ocr = Eynollah_ocr( - dir_models=model, - model_name=model_name, + model_zoo=ctx.obj.model_zoo, tr_ocr=tr_ocr, export_textline_images_and_text=export_textline_images_and_text, do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, diff --git a/src/eynollah/cli_models.py b/src/eynollah/cli_models.py index a299d19..2f6eded 100644 --- a/src/eynollah/cli_models.py +++ b/src/eynollah/cli_models.py @@ -6,30 +6,7 @@ import click from eynollah.model_zoo.default_specs import MODELS_VERSION from .model_zoo import EynollahModelZoo - -@dataclass() -class EynollahCliCtx: - model_zoo: EynollahModelZoo - - @click.group() -@click.pass_context -@click.option( - "--model", - "-m", - 'model_basedir', - help="directory of models", - type=click.Path(exists=True, file_okay=False), - # default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment", - required=True, -) -@click.option( - "--model-overrides", - "-mv", - help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list", - type=(str, str, str), - multiple=True, -) def models_cli( ctx, model_basedir: str, @@ -38,7 +15,7 @@ def models_cli( """ Organize models for the various runners in eynollah. """ - ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides)) + assert ctx.obj.model_zoo @models_cli.command('list') diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 98e894c..867d86b 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -138,8 +138,8 @@ num_patches =21*21#14*14#28*28#14*14#28*28 class Eynollah: def __init__( self, - dir_models : str, - model_overrides: List[Tuple[str, str, str]] = [], + *, + model_zoo: EynollahModelZoo, extract_only_images : bool =False, enable_plotting : bool = False, allow_enhancement : bool = False, @@ -164,7 +164,7 @@ class Eynollah: skip_layout_and_reading_order : bool = False, ): self.logger = getLogger('eynollah') - self.model_zoo = EynollahModelZoo(basedir=dir_models) + self.model_zoo = model_zoo self.plotter = None if skip_layout_and_reading_order: @@ -231,12 +231,10 @@ class Eynollah: self.logger.warning("no GPU device available") self.logger.info("Loading models...") - self.setup_models(*model_overrides) + self.setup_models() self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)") - def setup_models(self, *model_overrides: Tuple[str, str, str]): - # override defaults from CLI - self.model_zoo.override_models(*model_overrides) + def setup_models(self): # load models, depending on modes # (note: loading too many models can cause OOM on GPU/CUDA, diff --git a/src/eynollah/eynollah_ocr.py b/src/eynollah/eynollah_ocr.py index 3aafd8e..d32777a 100644 --- a/src/eynollah/eynollah_ocr.py +++ b/src/eynollah/eynollah_ocr.py @@ -51,8 +51,8 @@ except ImportError: class Eynollah_ocr: def __init__( self, - dir_models, - model_name=None, + *, + model_zoo: EynollahModelZoo, dir_xmls=None, tr_ocr=False, batch_size: Optional[int]=None, @@ -70,7 +70,7 @@ class Eynollah_ocr: # prefix or dataset self.pref_of_dataset = pref_of_dataset self.logger = logger if logger else getLogger('eynollah.ocr') - self.model_zoo = EynollahModelZoo(basedir=dir_models) + self.model_zoo = model_zoo # TODO: Properly document what 'export_textline_images_and_text' is about if export_textline_images_and_text: diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py index 74b4865..08d3d90 100644 --- a/src/eynollah/image_enhancer.py +++ b/src/eynollah/image_enhancer.py @@ -32,7 +32,8 @@ KERNEL = np.ones((5, 5), np.uint8) class Enhancer: def __init__( self, - dir_models : str, + *, + model_zoo: EynollahModelZoo, num_col_upper : Optional[int] = None, num_col_lower : Optional[int] = None, save_org_scale : bool = False, @@ -51,7 +52,7 @@ class Enhancer: self.num_col_lower = num_col_lower self.logger = logger if logger else getLogger('eynollah.enhance') - self.model_zoo = EynollahModelZoo(basedir=dir_models) + self.model_zoo = model_zoo for v in ['binarization', 'enhancement', 'col_classifier', 'page']: self.model_zoo.load_model(v) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 8338d35..620d6c0 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -32,12 +32,12 @@ KERNEL = np.ones((5, 5), np.uint8) class machine_based_reading_order_on_layout: def __init__( self, - dir_models : str, + *, + model_zoo: EynollahModelZoo, logger : Optional[Logger] = None, ): self.logger = logger if logger else getLogger('mbreorder') - self.dir_models = dir_models - self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824" + self.model_zoo = model_zoo try: for device in tf.config.list_physical_devices('GPU'): @@ -45,7 +45,6 @@ class machine_based_reading_order_on_layout: except: self.logger.warning("no GPU device available") - self.model_zoo = EynollahModelZoo(basedir=dir_models) self.model_zoo.load_model('reading_order') # FIXME: light_version is always true, no need for checks in the code self.light_version = True diff --git a/src/eynollah/model_zoo/__init__.py b/src/eynollah/model_zoo/__init__.py index dda52c2..e1dc985 100644 --- a/src/eynollah/model_zoo/__init__.py +++ b/src/eynollah/model_zoo/__init__.py @@ -1,7 +1,4 @@ __all__ = [ 'EynollahModelZoo', - 'KerasModel', - 'TrOCRProcessor', - 'VisionEncoderDecoderModel', ] -from .model_zoo import EynollahModelZoo, KerasModel, TrOCRProcessor, VisionEncoderDecoderModel +from .model_zoo import EynollahModelZoo diff --git a/src/eynollah/model_zoo/model_zoo.py b/src/eynollah/model_zoo/model_zoo.py index 32fdd0e..40e979f 100644 --- a/src/eynollah/model_zoo/model_zoo.py +++ b/src/eynollah/model_zoo/model_zoo.py @@ -4,11 +4,13 @@ from copy import deepcopy from pathlib import Path from typing import Dict, List, Optional, Tuple, Type, Union +from ocrd_utils import tf_disable_interactive_logs +tf_disable_interactive_logs() + from keras.layers import StringLookup from keras.models import Model as KerasModel from keras.models import load_model from tabulate import tabulate -from transformers import TrOCRProcessor, VisionEncoderDecoderModel from ..patch_encoder import PatchEncoder, Patches from .specs import EynollahModelSpecSet from .default_specs import DEFAULT_MODEL_SPECS @@ -102,6 +104,7 @@ class EynollahModelZoo: elif model_category == 'characters': model = self._load_characters() elif model_category == 'trocr_processor': + from transformers import TrOCRProcessor model = TrOCRProcessor.from_pretrained(model_path) else: try: @@ -128,7 +131,10 @@ class EynollahModelZoo: """ ocr_model_dir = self.model_path('ocr', variant) if variant == 'tr': - return VisionEncoderDecoderModel.from_pretrained(ocr_model_dir) + from transformers import VisionEncoderDecoderModel + ret = VisionEncoderDecoderModel.from_pretrained(ocr_model_dir) + assert isinstance(ret, VisionEncoderDecoderModel) + return ret else: ocr_model = load_model(ocr_model_dir, compile=False) assert isinstance(ocr_model, KerasModel) diff --git a/src/eynollah/model_zoo/types.py b/src/eynollah/model_zoo/types.py index 5c3685e..7141d39 100644 --- a/src/eynollah/model_zoo/types.py +++ b/src/eynollah/model_zoo/types.py @@ -1,6 +1,8 @@ from typing import List, TypeVar, Union from keras.models import Model as KerasModel -from transformers import TrOCRProcessor, VisionEncoderDecoderModel -AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List] +# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone +# from transformers import TrOCRProcessor, VisionEncoderDecoderModel +# AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List] +AnyModel = object T = TypeVar('T') diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 1bcf9d9..a8a05fa 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -24,11 +24,11 @@ def resize_image(img_in, input_height, input_width): class SbbBinarizer: - def __init__(self, model_dir: str, mode: str, logger=None): + def __init__(self, *, model_zoo: EynollahModelZoo, mode: str, logger=None): if mode not in ('single', 'multi'): raise ValueError(f"'mode' must be either 'multi' or 'single', not {mode}") self.log = logger if logger else logging.getLogger('eynollah.binarization') - self.model_zoo = EynollahModelZoo(basedir=model_dir) + self.model_zoo = model_zoo self.models = self.setup_models(mode) self.session = self.start_new_session() diff --git a/tests/cli_tests/conftest.py b/tests/cli_tests/conftest.py index c54f47b..223cc85 100644 --- a/tests/cli_tests/conftest.py +++ b/tests/cli_tests/conftest.py @@ -1,15 +1,17 @@ from typing import List -from click import Command import pytest import logging from click.testing import CliRunner, Result +from eynollah.cli import main as eynollah_cli + @pytest.fixture def run_eynollah_ok_and_check_logs( pytestconfig, caplog, model_dir, + eynollah_subcommands, eynollah_log_filter, ): """ @@ -18,14 +20,23 @@ def run_eynollah_ok_and_check_logs( every fragment in `expected_logs` """ - def _run_click_ok_logs(cli: Command, args: List[str], expected_logs: List[str]) -> Result: - args = ['-m', model_dir] + args + def _run_click_ok_logs( + subcommand: 'str', + args: List[str], + expected_logs: List[str], + ) -> Result: + assert subcommand in eynollah_subcommands, f'subcommand {subcommand} must be one of {eynollah_subcommands}' + args = [ + '-m', model_dir, + subcommand, + *args + ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) caplog.set_level(logging.INFO) runner = CliRunner() with caplog.filtering(eynollah_log_filter): - result = runner.invoke(cli, args, catch_exceptions=False) + result = runner.invoke(eynollah_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout if expected_logs: logmsgs = [logrec.message for logrec in caplog.records] diff --git a/tests/cli_tests/test_binarization.py b/tests/cli_tests/test_binarization.py index 4672a4f..0490805 100644 --- a/tests/cli_tests/test_binarization.py +++ b/tests/cli_tests/test_binarization.py @@ -1,10 +1,5 @@ import pytest from PIL import Image -from eynollah.cli import ( - binarization as binarization_cli, -) -from ocrd_modelfactory import page_from_file -from ocrd_models.constants import NAMESPACES as NS @pytest.mark.parametrize( "options", @@ -21,7 +16,7 @@ def test_run_eynollah_binarization_filename( infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') run_eynollah_ok_and_check_logs( - binarization_cli, + 'binarization', [ '-i', str(infile), '-o', str(outfile), @@ -45,7 +40,7 @@ def test_run_eynollah_binarization_directory( ): outdir = tmp_path run_eynollah_ok_and_check_logs( - binarization_cli, + 'binarization', [ '-di', str(resources_dir), '-o', str(outdir), diff --git a/tests/cli_tests/test_enhance.py b/tests/cli_tests/test_enhance.py index 590c07f..91e7c4b 100644 --- a/tests/cli_tests/test_enhance.py +++ b/tests/cli_tests/test_enhance.py @@ -1,10 +1,5 @@ import pytest from PIL import Image -from eynollah.cli import ( - enhancement as enhancement_cli, -) -from ocrd_modelfactory import page_from_file -from ocrd_models.constants import NAMESPACES as NS @pytest.mark.parametrize( "options", @@ -21,7 +16,7 @@ def test_run_eynollah_enhancement_filename( infile = resources_dir / 'kant_aufklaerung_1784_0020.tif' outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') run_eynollah_ok_and_check_logs( - enhancement_cli, + 'enhancement', [ '-i', str(infile), '-o', str(outfile.parent), @@ -44,7 +39,7 @@ def test_run_eynollah_enhancement_directory( ): outdir = tmp_path run_eynollah_ok_and_check_logs( - enhancement_cli, + 'enhancement', [ '-di', str(resources_dir), '-o', str(outdir), diff --git a/tests/cli_tests/test_layout.py b/tests/cli_tests/test_layout.py index db7b88c..776372c 100644 --- a/tests/cli_tests/test_layout.py +++ b/tests/cli_tests/test_layout.py @@ -1,7 +1,4 @@ import pytest -from eynollah.cli import ( - layout as layout_cli, -) from ocrd_modelfactory import page_from_file from ocrd_models.constants import NAMESPACES as NS @@ -30,11 +27,10 @@ def test_run_eynollah_layout_filename( resources_dir, options, ): - outdir = tmp_path infile = resources_dir / 'kant_aufklaerung_1784_0020.tif' outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' run_eynollah_ok_and_check_logs( - layout_cli, + 'layout', [ '-i', str(infile), '-o', str(outfile.parent), @@ -68,7 +64,7 @@ def test_run_eynollah_layout_filename2( infile = resources_dir / 'euler_rechenkunst01_1738_0025.tif' outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml' run_eynollah_ok_and_check_logs( - layout_cli, + 'layout', [ '-i', str(infile), '-o', str(outfile.parent), @@ -96,7 +92,7 @@ def test_run_eynollah_layout_directory( ): outdir = tmp_path run_eynollah_ok_and_check_logs( - layout_cli, + 'layout', [ '-di', str(resources_dir), '-o', str(outdir), diff --git a/tests/cli_tests/test_mbreorder.py b/tests/cli_tests/test_mbreorder.py index 7fb246d..25b44d8 100644 --- a/tests/cli_tests/test_mbreorder.py +++ b/tests/cli_tests/test_mbreorder.py @@ -1,11 +1,6 @@ from ocrd_modelfactory import page_from_file from ocrd_models.constants import NAMESPACES as NS -from eynollah.cli import ( - machine_based_reading_order as mbreorder_cli, -) - - def test_run_eynollah_mbreorder_filename( tmp_path, resources_dir, @@ -14,7 +9,7 @@ def test_run_eynollah_mbreorder_filename( infile = resources_dir / 'kant_aufklaerung_1784_0020.xml' outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') run_eynollah_ok_and_check_logs( - mbreorder_cli, + 'machine-based-reading-order', [ '-i', str(infile), '-o', str(outfile.parent), @@ -37,10 +32,9 @@ def test_run_eynollah_mbreorder_directory( resources_dir, run_eynollah_ok_and_check_logs, ): - outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outdir = tmp_path run_eynollah_ok_and_check_logs( - mbreorder_cli, + 'machine-based-reading-order', [ '-di', str(resources_dir), '-o', str(outdir), diff --git a/tests/cli_tests/test_ocr.py b/tests/cli_tests/test_ocr.py index 747d978..f6b33a7 100644 --- a/tests/cli_tests/test_ocr.py +++ b/tests/cli_tests/test_ocr.py @@ -1,7 +1,4 @@ import pytest -from eynollah.cli import ( - ocr as ocr_cli, -) from ocrd_modelfactory import page_from_file from ocrd_models.constants import NAMESPACES as NS @@ -26,7 +23,7 @@ def test_run_eynollah_ocr_filename( if "-doit" in options: options.insert(options.index("-doit") + 1, str(outrenderfile.parent)) run_eynollah_ok_and_check_logs( - ocr_cli, + 'ocr', [ '-i', str(infile), '-dx', str(infile.parent), @@ -53,7 +50,7 @@ def test_run_eynollah_ocr_directory( ): outdir = tmp_path run_eynollah_ok_and_check_logs( - ocr_cli, + 'ocr', [ '-di', str(resources_dir), '-dx', str(resources_dir), diff --git a/tests/cli_tests/test_run.py b/tests/cli_tests/test_run.py deleted file mode 100644 index 122bab5..0000000 --- a/tests/cli_tests/test_run.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest -from PIL import Image -from eynollah.cli import ( - layout as layout_cli, - binarization as binarization_cli, - enhancement as enhancement_cli, -) -from ocrd_modelfactory import page_from_file -from ocrd_models.constants import NAMESPACES as NS - diff --git a/tests/conftest.py b/tests/conftest.py index e73d0e3..9b70ae6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,3 +23,15 @@ def image_resources(resources_dir): @pytest.fixture() def eynollah_log_filter(): return lambda logrec: logrec.name.startswith('eynollah') + +@pytest.fixture +def eynollah_subcommands(): + return [ + 'binarization', + 'layout', + 'ocr', + 'enhancement', + 'machine-based-reading-order' + 'models' + ] +