diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index c9bad52..4a0704f 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -1,15 +1,34 @@ -import sys -import click +from dataclasses import dataclass import logging -from ocrd_utils import initLogging, getLevelName, getLogger -from eynollah.eynollah import Eynollah, Eynollah_ocr -from eynollah.sbb_binarize import SbbBinarizer -from eynollah.image_enhancer import Enhancer -from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout +import sys +from typing import Union + +import click + + +@dataclass +class EynollahCliContext(): + log_level : Union[str, None] = 'INFO' @click.group() -def main(): - pass +@click.option( + "--log_level", + "-l", + type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), + help="Override log level globally to this", +) +@click.pass_context +def main(ctx, log_level): + """ + eynollah - Document Layout Analysis, Image Enhancement, OCR + """ + ctx.obj = EynollahCliContext(log_level=log_level) + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.NOTSET) + formatter = logging.Formatter('%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s', datefmt='%H:%M:%S') + console_handler.setFormatter(formatter) + logging.getLogger('eynollah').addHandler(console_handler) + logging.getLogger('eynollah').setLevel(ctx.obj.log_level or logging.INFO) @main.command() @click.option( @@ -38,18 +57,13 @@ def main(): type=click.Path(exists=True, file_okay=False), required=True, ) -@click.option( - "--log_level", - "-l", - type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), - help="Override log level globally to this", -) - -def machine_based_reading_order(input, dir_in, out, model, log_level): +def machine_based_reading_order(input, dir_in, out, model): + """ + Generate ReadingOrder with a ML model + """ + from .mb_ro_on_layout import machine_based_reading_order_on_layout assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." orderer = machine_based_reading_order_on_layout(model) - if log_level: - orderer.logger.setLevel(getLevelName(log_level)) orderer.run(xml_filename=input, dir_in=dir_in, @@ -79,17 +93,13 @@ def machine_based_reading_order(input, dir_in, out, model, log_level): type=click.Path(file_okay=True, dir_okay=True), required=True, ) -@click.option( - "--log_level", - "-l", - type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), - help="Override log level globally to this", -) -def binarization(patches, model_dir, input_image, dir_in, output, log_level): +def binarization(patches, model_dir, input_image, dir_in, output): + """ + Binarize images with a ML model + """ assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." + from .sbb_binarize import SbbBinarizer binarizer = SbbBinarizer(model_dir) - if log_level: - binarizer.log.setLevel(getLevelName(log_level)) binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) @@ -144,24 +154,18 @@ def binarization(patches, model_dir, input_image, dir_in, output, log_level): is_flag=True, help="if this parameter set to true, this tool will save the enhanced image in org scale.", ) -@click.option( - "--log_level", - "-l", - type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), - help="Override log level globally to this", -) - -def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level): +def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale): + """ + Enhance image + """ assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." - initLogging() + from .image_enhancer import Enhancer enhancer = Enhancer( model, num_col_upper=num_col_upper, num_col_lower=num_col_lower, save_org_scale=save_org_scale, ) - if log_level: - enhancer.logger.setLevel(getLevelName(log_level)) enhancer.run(overwrite=overwrite, dir_in=dir_in, image_filename=image, @@ -366,30 +370,10 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low is_flag=True, help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.", ) -# TODO move to top-level CLI context -@click.option( - "--log_level", - "-l", - type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), - help="Override 'eynollah' log level globally to this", -) -# -@click.option( - "--setup-logging", - is_flag=True, - help="Setup a basic console logger", -) - -def layout(image, out, overwrite, dir_in, model, model_version, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging): - if setup_logging: - console_handler = logging.StreamHandler(sys.stdout) - console_handler.setLevel(logging.INFO) - formatter = logging.Formatter('%(message)s') - console_handler.setFormatter(formatter) - getLogger('eynollah').addHandler(console_handler) - getLogger('eynollah').setLevel(logging.INFO) - else: - initLogging() +def layout(image, out, overwrite, dir_in, model, model_version, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction): + """ + Detect Layout (with optional image enhancement and reading order detection) + """ assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep" assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep" assert enable_plotting or not save_all, "Plotting with -sa also requires -ep" @@ -409,6 +393,7 @@ def layout(image, out, overwrite, dir_in, model, model_version, save_images, sav assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l" assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho" assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." + from .eynollah import Eynollah eynollah = Eynollah( model, model_versions=model_version, @@ -435,8 +420,6 @@ def layout(image, out, overwrite, dir_in, model, model_version, save_images, sav threshold_art_class_textline=threshold_art_class_textline, threshold_art_class_layout=threshold_art_class_layout, ) - if log_level: - eynollah.logger.setLevel(getLevelName(log_level)) eynollah.run(overwrite=overwrite, image_filename=image, dir_in=dir_in, @@ -537,16 +520,11 @@ def layout(image, out, overwrite, dir_in, model, model_version, save_images, sav "-min_conf", help="minimum OCR confidence value. Text lines with a confidence value lower than this threshold will not be included in the output XML file.", ) -@click.option( - "--log_level", - "-l", - type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), - help="Override log level globally to this", -) -def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): - initLogging() - +def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text): + """ + Recognize text with a CNN/RNN or transformer ML model. + """ assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided." assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr" assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m" @@ -554,6 +532,7 @@ def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib" assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both." + from .eynollah import Eynollah_ocr eynollah_ocr = Eynollah_ocr( dir_models=model, model_name=model_name, @@ -562,10 +541,7 @@ def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, batch_size=batch_size, pref_of_dataset=dataset_abbrevation, - min_conf_value_of_textline_text=min_conf_value_of_textline_text, - ) - if log_level: - eynollah_ocr.logger.setLevel(getLevelName(log_level)) + min_conf_value_of_textline_text=min_conf_value_of_textline_text) eynollah_ocr.run(overwrite=overwrite, dir_in=dir_in, dir_in_bin=dir_in_bin, diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 13acba6..03ee7ce 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -7,6 +7,7 @@ document layout analysis (segmentation) with output in PAGE-XML """ # cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files +import logging import sys if sys.version_info < (3, 10): import importlib_resources @@ -19,8 +20,7 @@ import math import os import sys import time -from typing import Dict, List, Optional, Tuple -import atexit +from typing import List, Optional, Tuple import warnings from functools import partial from pathlib import Path @@ -39,7 +39,7 @@ from scipy.ndimage import gaussian_filter1d from numba import cuda from skimage.morphology import skeletonize from ocrd import OcrdPage -from ocrd_utils import getLogger, tf_disable_interactive_logs +from ocrd_utils import tf_disable_interactive_logs import statistics try: @@ -60,8 +60,6 @@ tf_disable_interactive_logs() import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.keras.models import load_model -tf.get_logger().setLevel("ERROR") -warnings.filterwarnings("ignore") # use tf1 compatibility for keras backend from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras import layers @@ -230,8 +228,9 @@ class Eynollah: threshold_art_class_layout: Optional[float] = None, threshold_art_class_textline: Optional[float] = None, skip_layout_and_reading_order : bool = False, + logger : Optional[logging.Logger] = None, ): - self.logger = getLogger('eynollah') + self.logger = logger or logging.getLogger('eynollah') self.plotter = None if skip_layout_and_reading_order: @@ -4888,14 +4887,13 @@ class Eynollah_ocr: do_not_mask_with_textline_contour=False, pref_of_dataset=None, min_conf_value_of_textline_text : Optional[float]=None, - logger=None, ): self.model_name = model_name self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour self.pref_of_dataset = pref_of_dataset - self.logger = logger if logger else getLogger('eynollah') + self.logger = logging.getLogger('eynollah') if not export_textline_images_and_text: if min_conf_value_of_textline_text: diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py index 9247efe..5e82cbd 100644 --- a/src/eynollah/image_enhancer.py +++ b/src/eynollah/image_enhancer.py @@ -2,7 +2,7 @@ Image enhancer. The output can be written as same scale of input or in new predicted scale. """ -from logging import Logger +import logging import os import time from typing import Optional @@ -11,7 +11,6 @@ import gc import cv2 import numpy as np -from ocrd_utils import getLogger, tf_disable_interactive_logs import tensorflow as tf from skimage.morphology import skeletonize from tensorflow.keras.models import load_model @@ -35,7 +34,6 @@ class Enhancer: num_col_upper : Optional[int] = None, num_col_lower : Optional[int] = None, save_org_scale : bool = False, - logger : Optional[Logger] = None, ): self.input_binary = False self.light_version = False @@ -49,7 +47,7 @@ class Enhancer: else: self.num_col_lower = num_col_lower - self.logger = logger if logger else getLogger('enhancement') + self.logger = logging.getLogger('eynollah.enhancement') self.dir_models = dir_models self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 1b991ae..7dc3f00 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -1,8 +1,8 @@ """ -Image enhancer. The output can be written as same scale of input or in new predicted scale. +Machine learning based reading order detection """ -from logging import Logger +import logging import os import time from typing import Optional @@ -11,7 +11,6 @@ import xml.etree.ElementTree as ET import cv2 import numpy as np -from ocrd_utils import getLogger import statistics import tensorflow as tf from tensorflow.keras.models import load_model @@ -33,9 +32,9 @@ class machine_based_reading_order_on_layout: def __init__( self, dir_models : str, - logger : Optional[Logger] = None, + logger : Optional[logging.Logger] = None, ): - self.logger = logger if logger else getLogger('mbreorder') + self.logger = logger or logging.getLogger('eynollah.mbreorder') self.dir_models = dir_models self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824" diff --git a/src/eynollah/ocrd_cli_binarization.py b/src/eynollah/ocrd_cli_binarization.py index 848bbac..e5f85b1 100644 --- a/src/eynollah/ocrd_cli_binarization.py +++ b/src/eynollah/ocrd_cli_binarization.py @@ -34,6 +34,7 @@ class SbbBinarizeProcessor(Processor): Set up the model prior to processing. """ # resolve relative path via OCR-D ResourceManager + assert isinstance(self.parameter, dict) model_path = self.resolve_resource(self.parameter['model']) self.binarizer = SbbBinarizer(model_dir=model_path, logger=self.logger) diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index 12c7356..60c136c 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -32,8 +32,8 @@ class EynollahProcessor(Processor): allow_scaling=self.parameter['allow_scaling'], headers_off=self.parameter['headers_off'], tables=self.parameter['tables'], + logger=self.logger ) - self.eynollah.logger = self.logger self.eynollah.plotter = None def shutdown(self): diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 3716987..1b46a01 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -2,19 +2,16 @@ Tool to load model and binarize a given image. """ -import sys from glob import glob import os import logging +from typing import Optional import numpy as np -from PIL import Image import cv2 -from ocrd_utils import tf_disable_interactive_logs -tf_disable_interactive_logs() import tensorflow as tf -from tensorflow.keras.models import load_model -from tensorflow.python.keras import backend as tensorflow_backend +from keras.models import load_model +from keras import backend as tensorflow_backend from .utils import is_image_filename @@ -23,9 +20,13 @@ def resize_image(img_in, input_height, input_width): class SbbBinarizer: - def __init__(self, model_dir, logger=None): + def __init__( + self, + model_dir, + logger: Optional[logging.Logger] = None, + ): self.model_dir = model_dir - self.log = logger if logger else logging.getLogger('SbbBinarizer') + self.logger = logger or logging.getLogger('eynollah.binarize') self.start_new_session() @@ -325,7 +326,7 @@ class SbbBinarizer: image = cv2.imread(image_path) img_last = 0 for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): - self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + self.logger.debug('Binarizing with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) res = self.predict(model, image, use_patches) @@ -345,17 +346,19 @@ class SbbBinarizer: img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 if output: + self.logger.info('Writing binarized image to %s', output) cv2.imwrite(output, img_last) return img_last else: ls_imgs = list(filter(is_image_filename, os.listdir(dir_in))) - for image_name in ls_imgs: + self.logger.info("Found %d image files to binarize in %s", len(ls_imgs), dir_in) + for i, image_name in enumerate(ls_imgs): image_stem = image_name.split('.')[0] - print(image_name,'image_name') + self.logger.info('Binarizing [%3d/%d] %s', i + 1, len(ls_imgs), image_name) image = cv2.imread(os.path.join(dir_in,image_name) ) img_last = 0 for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): - self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + self.logger.debug('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) res = self.predict(model, image, use_patches) @@ -375,4 +378,6 @@ class SbbBinarizer: img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 - cv2.imwrite(os.path.join(output, image_stem + '.png'), img_last) + output_filename = os.path.join(output, image_stem + '.png') + self.logger.info('Writing binarized image to %s', output_filename) + cv2.imwrite(output_filename, img_last) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 5ccb2af..9734f93 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -19,7 +19,6 @@ from .contour import (contours_in_same_horizon, find_new_features_of_contours, return_contours_of_image, return_parent_contours) - def pairwise(iterable): # pairwise('ABCDEFG') → AB BC CD DE EF FG diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 9c3456a..52402f8 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -2,11 +2,11 @@ # pylint: disable=import-error from pathlib import Path import os.path +import logging import xml.etree.ElementTree as ET from .utils.xml import create_page_xml, xml_reading_order from .utils.counter import EynollahIdCounter -from ocrd_utils import getLogger from ocrd_models.ocrd_page import ( BorderType, CoordsType, @@ -24,7 +24,7 @@ import numpy as np class EynollahXmlWriter: def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None): - self.logger = getLogger('eynollah.writer') + self.logger = logging.getLogger('eynollah.writer') self.counter = EynollahIdCounter() self.dir_out = dir_out self.image_filename = image_filename diff --git a/tests/test_run.py b/tests/test_run.py index 79c64c2..9606706 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -4,11 +4,7 @@ import pytest import logging from PIL import Image from eynollah.cli import ( - layout as layout_cli, - binarization as binarization_cli, - enhancement as enhancement_cli, - machine_based_reading_order as mbreorder_cli, - ocr as ocr_cli, + main as main_cli, ) from click.testing import CliRunner from ocrd_modelfactory import page_from_file @@ -43,18 +39,19 @@ def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' args = [ + 'layout', '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(layout_cli, args + options, catch_exceptions=False) + result = runner.invoke(main_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert str(infile) in logmsgs @@ -78,18 +75,19 @@ def test_run_eynollah_layout_filename2(tmp_path, pytestconfig, caplog, options): infile = testdir.joinpath('resources/euler_rechenkunst01_1738_0025.tif') outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml' args = [ + 'layout', '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(layout_cli, args + options, catch_exceptions=False) + result = runner.invoke(main_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert str(infile) in logmsgs @@ -109,18 +107,19 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ + 'layout', '-m', MODELS_LAYOUT, '-di', str(indir), '-o', str(outdir), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(layout_cli, args, catch_exceptions=False) + result = runner.invoke(main_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2 @@ -137,18 +136,19 @@ def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, opti infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ + 'binarization', '-m', MODELS_BIN, '-i', str(infile), '-o', str(outfile), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'SbbBinarizer' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) + result = runner.invoke(main_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) @@ -163,18 +163,19 @@ def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ + 'binarization', '-m', MODELS_BIN, '-di', str(indir), '-o', str(outdir), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'SbbBinarizer' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(binarization_cli, args, catch_exceptions=False) + result = runner.invoke(main_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 @@ -190,18 +191,19 @@ def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, optio infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ + 'enhancement', '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'enhancement' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) + result = runner.invoke(main_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs @@ -216,18 +218,19 @@ def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ + 'enhancement', '-m', MODELS_LAYOUT, '-di', str(indir), '-o', str(outdir), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'enhancement' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(enhancement_cli, args, catch_exceptions=False) + result = runner.invoke(main_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 @@ -237,18 +240,19 @@ def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') args = [ + 'machine-based-reading-order', '-m', MODELS_LAYOUT, '-i', str(infile), '-o', str(outfile.parent), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'mbreorder' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(mbreorder_cli, args, catch_exceptions=False) + result = runner.invoke(main_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] # FIXME: mbreorder has no logging! @@ -266,18 +270,19 @@ def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ + 'machine-based-reading-order', '-m', MODELS_LAYOUT, '-di', str(indir), '-o', str(outdir), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'mbreorder' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(mbreorder_cli, args, catch_exceptions=False) + result = runner.invoke(main_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] # FIXME: mbreorder has no logging! @@ -298,13 +303,14 @@ def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options): outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png') outrenderfile.parent.mkdir() args = [ + 'ocr', '-m', MODELS_OCR, '-i', str(infile), '-dx', str(infile.parent), '-o', str(outfile.parent), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.DEBUG) def only_eynollah(logrec): return logrec.name == 'eynollah' @@ -312,7 +318,7 @@ def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options): if "-doit" in options: options.insert(options.index("-doit") + 1, str(outrenderfile.parent)) with caplog.filtering(only_eynollah): - result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) + result = runner.invoke(main_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] # FIXME: ocr has no logging! @@ -331,19 +337,20 @@ def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path args = [ + 'ocr', '-m', MODELS_OCR, '-di', str(indir), '-dx', str(indir), '-o', str(outdir), ] if pytestconfig.getoption('verbose') > 0: - args.extend(['-l', 'DEBUG']) + args = ['-l', 'DEBUG'] + args caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'eynollah' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(ocr_cli, args, catch_exceptions=False) + result = runner.invoke(main_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] # FIXME: ocr has no logging!