make --model-basedir and --model-overrides top-level CLI options

This commit is contained in:
kba 2025-10-29 18:24:17 +01:00
parent b6f82c72b9
commit a913bdf7dc
18 changed files with 132 additions and 170 deletions

View file

@ -2,20 +2,36 @@ from dataclasses import dataclass
import sys
import click
import logging
from typing import Tuple, List
from ocrd_utils import initLogging, getLevelName, getLogger
from eynollah.eynollah import Eynollah
from eynollah.eynollah_ocr import Eynollah_ocr
from eynollah.sbb_binarize import SbbBinarizer
from eynollah.image_enhancer import Enhancer
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
from eynollah.model_zoo import EynollahModelZoo
from .cli_models import models_cli
@dataclass()
class EynollahCliCtx:
model_zoo: EynollahModelZoo
@click.group()
def main():
pass
@click.option(
"--model-basedir",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model-overrides",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
@click.pass_context
def main(ctx, model_basedir, model_overrides):
# Initialize model zoo
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
main.add_command(models_cli, 'models')
@ -39,23 +55,17 @@ main.add_command(models_cli, 'models')
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--log_level",
"-l",
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this",
)
def machine_based_reading_order(input, dir_in, out, model, log_level):
@click.pass_context
def machine_based_reading_order(ctx, input, dir_in, out, log_level):
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
orderer = machine_based_reading_order_on_layout(model)
orderer = machine_based_reading_order_on_layout(model_zoo=ctx.obj.model_zoo)
if log_level:
orderer.logger.setLevel(getLevelName(log_level))
@ -67,7 +77,6 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
@main.command()
@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction')
@click.option(
"--input-image", "--image",
"-i",
@ -92,7 +101,7 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
'--mode',
type=click.Choice(['single', 'multi']),
default='single',
help="Whether to use the (faster) single-model binarization or the (slightly better) multi-model binarization"
help="Whether to use the (newer and faster) single-model binarization or the (slightly better) multi-model binarization"
)
@click.option(
"--log_level",
@ -100,17 +109,19 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this",
)
@click.pass_context
def binarization(
ctx,
patches,
model_dir,
input_image,
mode,
dir_in,
output,
log_level,
):
from eynollah.sbb_binarize import SbbBinarizer
assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
binarizer = SbbBinarizer(model_dir, mode=mode)
binarizer = SbbBinarizer(model_zoo=ctx.obj.model_zoo, mode=mode)
if log_level:
binarizer.log.setLevel(getLevelName(log_level))
binarizer.run(
@ -148,14 +159,6 @@ def binarization(
help="directory of input images (instead of --image)",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--num_col_upper",
"-ncu",
@ -178,12 +181,13 @@ def binarization(
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this",
)
def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level):
@click.pass_context
def enhancement(ctx, image, out, overwrite, dir_in, num_col_upper, num_col_lower, save_org_scale, log_level):
from eynollah.image_enhancer import Enhancer
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
initLogging()
enhancer = Enhancer(
model,
model_zoo=ctx.obj.model_zoo,
num_col_upper=num_col_upper,
num_col_lower=num_col_lower,
save_org_scale=save_org_scale,
@ -223,22 +227,6 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
help="directory of input images (instead of --image)",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--model",
"-m",
'model_basedir',
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model_version",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
@click.option(
"--save_images",
"-si",
@ -409,14 +397,13 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
is_flag=True,
help="Setup a basic console logger",
)
@click.pass_context
def layout(
ctx,
image,
out,
overwrite,
dir_in,
model_basedir,
model_version,
save_images,
save_layout,
save_deskewed,
@ -447,6 +434,7 @@ def layout(
log_level,
setup_logging,
):
from eynollah.eynollah import Eynollah
if setup_logging:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
@ -476,8 +464,7 @@ def layout(
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
eynollah = Eynollah(
model_basedir,
model_overrides=model_version,
model_zoo=ctx.obj.model_zoo,
extract_only_images=extract_only_images,
enable_plotting=enable_plotting,
allow_enhancement=allow_enhancement,
@ -559,17 +546,6 @@ def layout(
help="overwrite (instead of skipping) if output xml exists",
is_flag=True,
)
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--model_name",
help="Specific model file path to use for OCR",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--tr_ocr",
"-trocr/-notrocr",
@ -609,20 +585,36 @@ def layout(
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this",
)
def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
@click.pass_context
def ocr(
ctx,
image,
dir_in,
dir_in_bin,
dir_xmls,
out,
dir_out_image_text,
overwrite,
tr_ocr,
export_textline_images_and_text,
do_not_mask_with_textline_contour,
batch_size,
dataset_abbrevation,
min_conf_value_of_textline_text,
log_level,
):
from eynollah.eynollah_ocr import Eynollah_ocr
initLogging()
assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided."
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
# FIXME: refactor: move export_textline_images_and_text out of eynollah.py
# assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
eynollah_ocr = Eynollah_ocr(
dir_models=model,
model_name=model_name,
model_zoo=ctx.obj.model_zoo,
tr_ocr=tr_ocr,
export_textline_images_and_text=export_textline_images_and_text,
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,

View file

@ -6,30 +6,7 @@ import click
from eynollah.model_zoo.default_specs import MODELS_VERSION
from .model_zoo import EynollahModelZoo
@dataclass()
class EynollahCliCtx:
model_zoo: EynollahModelZoo
@click.group()
@click.pass_context
@click.option(
"--model",
"-m",
'model_basedir',
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model-overrides",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
def models_cli(
ctx,
model_basedir: str,
@ -38,7 +15,7 @@ def models_cli(
"""
Organize models for the various runners in eynollah.
"""
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
assert ctx.obj.model_zoo
@models_cli.command('list')

View file

@ -138,8 +138,8 @@ num_patches =21*21#14*14#28*28#14*14#28*28
class Eynollah:
def __init__(
self,
dir_models : str,
model_overrides: List[Tuple[str, str, str]] = [],
*,
model_zoo: EynollahModelZoo,
extract_only_images : bool =False,
enable_plotting : bool = False,
allow_enhancement : bool = False,
@ -164,7 +164,7 @@ class Eynollah:
skip_layout_and_reading_order : bool = False,
):
self.logger = getLogger('eynollah')
self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_zoo = model_zoo
self.plotter = None
if skip_layout_and_reading_order:
@ -231,12 +231,10 @@ class Eynollah:
self.logger.warning("no GPU device available")
self.logger.info("Loading models...")
self.setup_models(*model_overrides)
self.setup_models()
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
def setup_models(self, *model_overrides: Tuple[str, str, str]):
# override defaults from CLI
self.model_zoo.override_models(*model_overrides)
def setup_models(self):
# load models, depending on modes
# (note: loading too many models can cause OOM on GPU/CUDA,

View file

@ -51,8 +51,8 @@ except ImportError:
class Eynollah_ocr:
def __init__(
self,
dir_models,
model_name=None,
*,
model_zoo: EynollahModelZoo,
dir_xmls=None,
tr_ocr=False,
batch_size: Optional[int]=None,
@ -70,7 +70,7 @@ class Eynollah_ocr:
# prefix or dataset
self.pref_of_dataset = pref_of_dataset
self.logger = logger if logger else getLogger('eynollah.ocr')
self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_zoo = model_zoo
# TODO: Properly document what 'export_textline_images_and_text' is about
if export_textline_images_and_text:

View file

@ -32,7 +32,8 @@ KERNEL = np.ones((5, 5), np.uint8)
class Enhancer:
def __init__(
self,
dir_models : str,
*,
model_zoo: EynollahModelZoo,
num_col_upper : Optional[int] = None,
num_col_lower : Optional[int] = None,
save_org_scale : bool = False,
@ -51,7 +52,7 @@ class Enhancer:
self.num_col_lower = num_col_lower
self.logger = logger if logger else getLogger('eynollah.enhance')
self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_zoo = model_zoo
for v in ['binarization', 'enhancement', 'col_classifier', 'page']:
self.model_zoo.load_model(v)

View file

@ -32,12 +32,12 @@ KERNEL = np.ones((5, 5), np.uint8)
class machine_based_reading_order_on_layout:
def __init__(
self,
dir_models : str,
*,
model_zoo: EynollahModelZoo,
logger : Optional[Logger] = None,
):
self.logger = logger if logger else getLogger('mbreorder')
self.dir_models = dir_models
self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"
self.model_zoo = model_zoo
try:
for device in tf.config.list_physical_devices('GPU'):
@ -45,7 +45,6 @@ class machine_based_reading_order_on_layout:
except:
self.logger.warning("no GPU device available")
self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_zoo.load_model('reading_order')
# FIXME: light_version is always true, no need for checks in the code
self.light_version = True

View file

@ -1,7 +1,4 @@
__all__ = [
'EynollahModelZoo',
'KerasModel',
'TrOCRProcessor',
'VisionEncoderDecoderModel',
]
from .model_zoo import EynollahModelZoo, KerasModel, TrOCRProcessor, VisionEncoderDecoderModel
from .model_zoo import EynollahModelZoo

View file

@ -4,11 +4,13 @@ from copy import deepcopy
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type, Union
from ocrd_utils import tf_disable_interactive_logs
tf_disable_interactive_logs()
from keras.layers import StringLookup
from keras.models import Model as KerasModel
from keras.models import load_model
from tabulate import tabulate
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from ..patch_encoder import PatchEncoder, Patches
from .specs import EynollahModelSpecSet
from .default_specs import DEFAULT_MODEL_SPECS
@ -102,6 +104,7 @@ class EynollahModelZoo:
elif model_category == 'characters':
model = self._load_characters()
elif model_category == 'trocr_processor':
from transformers import TrOCRProcessor
model = TrOCRProcessor.from_pretrained(model_path)
else:
try:
@ -128,7 +131,10 @@ class EynollahModelZoo:
"""
ocr_model_dir = self.model_path('ocr', variant)
if variant == 'tr':
return VisionEncoderDecoderModel.from_pretrained(ocr_model_dir)
from transformers import VisionEncoderDecoderModel
ret = VisionEncoderDecoderModel.from_pretrained(ocr_model_dir)
assert isinstance(ret, VisionEncoderDecoderModel)
return ret
else:
ocr_model = load_model(ocr_model_dir, compile=False)
assert isinstance(ocr_model, KerasModel)

View file

@ -1,6 +1,8 @@
from typing import List, TypeVar, Union
from keras.models import Model as KerasModel
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List]
# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List]
AnyModel = object
T = TypeVar('T')

View file

@ -24,11 +24,11 @@ def resize_image(img_in, input_height, input_width):
class SbbBinarizer:
def __init__(self, model_dir: str, mode: str, logger=None):
def __init__(self, *, model_zoo: EynollahModelZoo, mode: str, logger=None):
if mode not in ('single', 'multi'):
raise ValueError(f"'mode' must be either 'multi' or 'single', not {mode}")
self.log = logger if logger else logging.getLogger('eynollah.binarization')
self.model_zoo = EynollahModelZoo(basedir=model_dir)
self.model_zoo = model_zoo
self.models = self.setup_models(mode)
self.session = self.start_new_session()

View file

@ -1,15 +1,17 @@
from typing import List
from click import Command
import pytest
import logging
from click.testing import CliRunner, Result
from eynollah.cli import main as eynollah_cli
@pytest.fixture
def run_eynollah_ok_and_check_logs(
pytestconfig,
caplog,
model_dir,
eynollah_subcommands,
eynollah_log_filter,
):
"""
@ -18,14 +20,23 @@ def run_eynollah_ok_and_check_logs(
every fragment in `expected_logs`
"""
def _run_click_ok_logs(cli: Command, args: List[str], expected_logs: List[str]) -> Result:
args = ['-m', model_dir] + args
def _run_click_ok_logs(
subcommand: 'str',
args: List[str],
expected_logs: List[str],
) -> Result:
assert subcommand in eynollah_subcommands, f'subcommand {subcommand} must be one of {eynollah_subcommands}'
args = [
'-m', model_dir,
subcommand,
*args
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
runner = CliRunner()
with caplog.filtering(eynollah_log_filter):
result = runner.invoke(cli, args, catch_exceptions=False)
result = runner.invoke(eynollah_cli, args, catch_exceptions=False)
assert result.exit_code == 0, result.stdout
if expected_logs:
logmsgs = [logrec.message for logrec in caplog.records]

View file

@ -1,10 +1,5 @@
import pytest
from PIL import Image
from eynollah.cli import (
binarization as binarization_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
@pytest.mark.parametrize(
"options",
@ -21,7 +16,7 @@ def test_run_eynollah_binarization_filename(
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
run_eynollah_ok_and_check_logs(
binarization_cli,
'binarization',
[
'-i', str(infile),
'-o', str(outfile),
@ -45,7 +40,7 @@ def test_run_eynollah_binarization_directory(
):
outdir = tmp_path
run_eynollah_ok_and_check_logs(
binarization_cli,
'binarization',
[
'-di', str(resources_dir),
'-o', str(outdir),

View file

@ -1,10 +1,5 @@
import pytest
from PIL import Image
from eynollah.cli import (
enhancement as enhancement_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
@pytest.mark.parametrize(
"options",
@ -21,7 +16,7 @@ def test_run_eynollah_enhancement_filename(
infile = resources_dir / 'kant_aufklaerung_1784_0020.tif'
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
run_eynollah_ok_and_check_logs(
enhancement_cli,
'enhancement',
[
'-i', str(infile),
'-o', str(outfile.parent),
@ -44,7 +39,7 @@ def test_run_eynollah_enhancement_directory(
):
outdir = tmp_path
run_eynollah_ok_and_check_logs(
enhancement_cli,
'enhancement',
[
'-di', str(resources_dir),
'-o', str(outdir),

View file

@ -1,7 +1,4 @@
import pytest
from eynollah.cli import (
layout as layout_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
@ -30,11 +27,10 @@ def test_run_eynollah_layout_filename(
resources_dir,
options,
):
outdir = tmp_path
infile = resources_dir / 'kant_aufklaerung_1784_0020.tif'
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
run_eynollah_ok_and_check_logs(
layout_cli,
'layout',
[
'-i', str(infile),
'-o', str(outfile.parent),
@ -68,7 +64,7 @@ def test_run_eynollah_layout_filename2(
infile = resources_dir / 'euler_rechenkunst01_1738_0025.tif'
outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml'
run_eynollah_ok_and_check_logs(
layout_cli,
'layout',
[
'-i', str(infile),
'-o', str(outfile.parent),
@ -96,7 +92,7 @@ def test_run_eynollah_layout_directory(
):
outdir = tmp_path
run_eynollah_ok_and_check_logs(
layout_cli,
'layout',
[
'-di', str(resources_dir),
'-o', str(outdir),

View file

@ -1,11 +1,6 @@
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
from eynollah.cli import (
machine_based_reading_order as mbreorder_cli,
)
def test_run_eynollah_mbreorder_filename(
tmp_path,
resources_dir,
@ -14,7 +9,7 @@ def test_run_eynollah_mbreorder_filename(
infile = resources_dir / 'kant_aufklaerung_1784_0020.xml'
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
run_eynollah_ok_and_check_logs(
mbreorder_cli,
'machine-based-reading-order',
[
'-i', str(infile),
'-o', str(outfile.parent),
@ -37,10 +32,9 @@ def test_run_eynollah_mbreorder_directory(
resources_dir,
run_eynollah_ok_and_check_logs,
):
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
outdir = tmp_path
run_eynollah_ok_and_check_logs(
mbreorder_cli,
'machine-based-reading-order',
[
'-di', str(resources_dir),
'-o', str(outdir),

View file

@ -1,7 +1,4 @@
import pytest
from eynollah.cli import (
ocr as ocr_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS
@ -26,7 +23,7 @@ def test_run_eynollah_ocr_filename(
if "-doit" in options:
options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
run_eynollah_ok_and_check_logs(
ocr_cli,
'ocr',
[
'-i', str(infile),
'-dx', str(infile.parent),
@ -53,7 +50,7 @@ def test_run_eynollah_ocr_directory(
):
outdir = tmp_path
run_eynollah_ok_and_check_logs(
ocr_cli,
'ocr',
[
'-di', str(resources_dir),
'-dx', str(resources_dir),

View file

@ -1,10 +0,0 @@
import pytest
from PIL import Image
from eynollah.cli import (
layout as layout_cli,
binarization as binarization_cli,
enhancement as enhancement_cli,
)
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS

View file

@ -23,3 +23,15 @@ def image_resources(resources_dir):
@pytest.fixture()
def eynollah_log_filter():
return lambda logrec: logrec.name.startswith('eynollah')
@pytest.fixture
def eynollah_subcommands():
return [
'binarization',
'layout',
'ocr',
'enhancement',
'machine-based-reading-order'
'models'
]