eynollah/tests/test_run.py

from os import environ
from pathlib import Path
import pytest
import logging
from PIL import Image
from eynollah.cli import (
    layout as layout_cli,
    binarization as binarization_cli,
    enhancement as enhancement_cli,
    machine_based_reading_order as mbreorder_cli,
    ocr as ocr_cli,
)
from click.testing import CliRunner
from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS

testdir = Path(__file__).parent.resolve()

MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))

def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
    args = [
        '-m', MODELS_LAYOUT,
        '-i', str(infile),
        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'eynollah'
    runner = CliRunner()
    for options in [
            [], # defaults
            ["--allow_scaling", "--curved-line"],
            ["--allow_scaling", "--curved-line", "--full-layout"],
            ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"],
            ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based",
             "--textline_light", "--light_version"],
            # -ep ...
            # -eoi ...
            # --do_ocr
            # --skip_layout_and_reading_order
    ]:
        with subtests.test(#msg="test CLI",
                           options=options):
            with caplog.filtering(only_eynollah):
                result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
            assert result.exit_code == 0, result.stdout
            logmsgs = [logrec.message for logrec in caplog.records]
            assert str(infile) in logmsgs
            assert outfile.exists()
            tree = page_from_file(str(outfile)).etree
            regions = tree.xpath("//page:TextRegion", namespaces=NS)
            assert len(regions) >= 2, "result is inaccurate"
            regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
            assert len(regions) >= 2, "result is inaccurate"
            lines = tree.xpath("//page:TextLine", namespaces=NS)
            assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line

def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
        '-m', MODELS_LAYOUT,
        '-di', str(indir),
        '-o', str(outdir),
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'eynollah'
    runner = CliRunner()
    with caplog.filtering(only_eynollah):
        result = runner.invoke(layout_cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.stdout
    logmsgs = [logrec.message for logrec in caplog.records]
    assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2
    assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
    assert len(list(outdir.iterdir())) == 2

def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
    args = [
        '-m', MODELS_BIN,
        '-i', str(infile),
        '-o', str(outfile),
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'SbbBinarizer'
    runner = CliRunner()
    for options in [
            [], # defaults
            ["--no-patches"],
    ]:
        with subtests.test(#msg="test CLI",
                           options=options):
            with caplog.filtering(only_eynollah):
                result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
            assert result.exit_code == 0, result.stdout
            logmsgs = [logrec.message for logrec in caplog.records]
            assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
            assert outfile.exists()
            with Image.open(infile) as original_img:
                original_size = original_img.size
            with Image.open(outfile) as binarized_img:
                binarized_size = binarized_img.size
            assert original_size == binarized_size

def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
        '-m', MODELS_BIN,
        '-di', str(indir),
        '-o', str(outdir),
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'SbbBinarizer'
    runner = CliRunner()
    with caplog.filtering(only_eynollah):
        result = runner.invoke(binarization_cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.stdout
    logmsgs = [logrec.message for logrec in caplog.records]
    assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
    assert len(list(outdir.iterdir())) == 2

def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
    args = [
        '-m', MODELS_LAYOUT,
        '-i', str(infile),
        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'enhancement'
    runner = CliRunner()
    for options in [
            [], # defaults
            ["-sos"],
    ]:
        with subtests.test(#msg="test CLI",
                           options=options):
            with caplog.filtering(only_eynollah):
                result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
            assert result.exit_code == 0, result.stdout
            logmsgs = [logrec.message for logrec in caplog.records]
            assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
            assert outfile.exists()
            with Image.open(infile) as original_img:
                original_size = original_img.size
            with Image.open(outfile) as enhanced_img:
                enhanced_size = enhanced_img.size
            assert (original_size == enhanced_size) == ("-sos" in options)

def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
        '-m', MODELS_LAYOUT,
        '-di', str(indir),
        '-o', str(outdir),
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'enhancement'
    runner = CliRunner()
    with caplog.filtering(only_eynollah):
        result = runner.invoke(enhancement_cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.stdout
    logmsgs = [logrec.message for logrec in caplog.records]
    assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
    assert len(list(outdir.iterdir())) == 2

def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
    args = [
        '-m', MODELS_LAYOUT,
        '-i', str(infile),
        '-o', str(outfile.parent),
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'mbreorder'
    runner = CliRunner()
    with caplog.filtering(only_eynollah):
        result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.stdout
    logmsgs = [logrec.message for logrec in caplog.records]
    # FIXME: mbreorder has no logging!
    #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
    assert outfile.exists()
    #in_tree = page_from_file(str(infile)).etree
    #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
    out_tree = page_from_file(str(outfile)).etree
    out_order = out_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
    #assert len(out_order) >= 2, "result is inaccurate"
    #assert in_order != out_order
    assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']

def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):
    indir = testdir.joinpath('resources')
    outdir = tmp_path
    args = [
        '-m', MODELS_LAYOUT,
        '-di', str(indir),
        '-o', str(outdir),
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.INFO)
    def only_eynollah(logrec):
        return logrec.name == 'mbreorder'
    runner = CliRunner()
    with caplog.filtering(only_eynollah):
        result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.stdout
    logmsgs = [logrec.message for logrec in caplog.records]
    # FIXME: mbreorder has no logging!
    #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
    assert len(list(outdir.iterdir())) == 2

def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
    outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
    outrenderfile.parent.mkdir()
    args = [
        '-m', MODELS_OCR,
        '-i', str(infile),
        '-dx', str(infile.parent),
        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
    caplog.set_level(logging.DEBUG)
    def only_eynollah(logrec):
        return logrec.name == 'eynollah'
    runner = CliRunner()
    for options in [
            # kba  Fri Sep 26 12:53:49 CEST 2025
            # Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
            # [], # defaults
            # ["-doit", str(outrenderfile.parent)],
            ["-trocr"],
    ]:
        with subtests.test(#msg="test CLI",
                           options=options):
            with caplog.filtering(only_eynollah):
                result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
            assert result.exit_code == 0, result.stdout
            logmsgs = [logrec.message for logrec in caplog.records]
            # FIXME: ocr has no logging!
            #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
            assert outfile.exists()
            if "-doit" in options:
                assert outrenderfile.exists()
            #in_tree = page_from_file(str(infile)).etree
            #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
            out_tree = page_from_file(str(outfile)).etree
            out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
            assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
            assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)

# kba  Fri Sep 26 12:53:49 CEST 2025
# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
# def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
#     indir = testdir.joinpath('resources')
#     outdir = tmp_path
#     args = [
#         '-m', MODELS_OCR,
#         '-di', str(indir),
#         '-dx', str(indir),
#         '-o', str(outdir),
#     ]
#     if pytestconfig.getoption('verbose') > 0:
#         args.extend(['-l', 'DEBUG'])
#     caplog.set_level(logging.INFO)
#     def only_eynollah(logrec):
#         return logrec.name == 'eynollah'
#     runner = CliRunner()
#     with caplog.filtering(only_eynollah):
#         result = runner.invoke(ocr_cli, args, catch_exceptions=False)
#     assert result.exit_code == 0, result.stdout
#     logmsgs = [logrec.message for logrec in caplog.records]
#     # FIXME: ocr has no logging!
#     #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
#     assert len(list(outdir.iterdir())) == 2
do an actual test run 2021-02-04 15:21:14 +01:00			`from os import environ`
			`from pathlib import Path`
tests: also disable ...ocr_directory test 2025-09-26 13:57:08 +02:00			`import pytest`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`import logging`
			`from PIL import Image`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`from eynollah.cli import (`
			`layout as layout_cli,`
			`binarization as binarization_cli,`
			`enhancement as enhancement_cli,`
			`machine_based_reading_order as mbreorder_cli,`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`ocr as ocr_cli,`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`)`
pytest: use subtests for various layout options, add coverage 2025-04-04 22:22:50 +02:00			`from click.testing import CliRunner`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`from ocrd_modelfactory import page_from_file`
			`from ocrd_models.constants import NAMESPACES as NS`
do an actual test run 2021-02-04 15:21:14 +01:00
			`testdir = Path(__file__).parent.resolve()`

tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))`
			`MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))`
			`MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))`
do an actual test run 2021-02-04 15:21:14 +01:00
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')`
			`outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'`
pytest: use subtests for various layout options, add coverage 2025-04-04 22:22:50 +02:00			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_LAYOUT,`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`'-i', str(infile),`
			`'-o', str(outfile.parent),`
pytest: use subtests for various layout options, add coverage 2025-04-04 22:22:50 +02:00			`# subtests write to same location`
			`'--overwrite',`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'eynollah'`
pytest: use subtests for various layout options, add coverage 2025-04-04 22:22:50 +02:00			`runner = CliRunner()`
			`for options in [`
			`[], # defaults`
			`["--allow_scaling", "--curved-line"],`
			`["--allow_scaling", "--curved-line", "--full-layout"],`
			`["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"],`
			`["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based",`
			`"--textline_light", "--light_version"],`
			`# -ep ...`
			`# -eoi ...`
			`# --do_ocr`
			`# --skip_layout_and_reading_order`
			`]:`
			`with subtests.test(#msg="test CLI",`
			`options=options):`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`with caplog.filtering(only_eynollah):`
test_run: ensure exceptions are shown 2025-04-06 18:24:56 +00:00			`result = runner.invoke(layout_cli, args + options, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`logmsgs = [logrec.message for logrec in caplog.records]`
			`assert str(infile) in logmsgs`
			`assert outfile.exists()`
			`tree = page_from_file(str(outfile)).etree`
			`regions = tree.xpath("//page:TextRegion", namespaces=NS)`
			`assert len(regions) >= 2, "result is inaccurate"`
			`regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)`
			`assert len(regions) >= 2, "result is inaccurate"`
			`lines = tree.xpath("//page:TextLine", namespaces=NS)`
			`assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line`

pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):`
			`indir = testdir.joinpath('resources')`
			`outdir = tmp_path`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_LAYOUT,`
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`'-di', str(indir),`
			`'-o', str(outdir),`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'eynollah'`
			`runner = CliRunner()`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(layout_cli, args, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`logmsgs = [logrec.message for logrec in caplog.records]`
			`assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2`
			`assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))`
			`assert len(list(outdir.iterdir())) == 2`

			`def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog):`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')`
			`outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_BIN,`
resolving tests error 2025-07-23 16:44:17 +02:00			`'-i', str(infile),`
			`'-o', str(outfile),`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`]`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'SbbBinarizer'`
			`runner = CliRunner()`
			`for options in [`
			`[], # defaults`
			`["--no-patches"],`
			`]:`
			`with subtests.test(#msg="test CLI",`
			`options=options):`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
pytest: add asserts for results, add binarization 2025-04-04 23:37:00 +02:00			`logmsgs = [logrec.message for logrec in caplog.records]`
			`assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))`
			`assert outfile.exists()`
			`with Image.open(infile) as original_img:`
			`original_size = original_img.size`
			`with Image.open(outfile) as binarized_img:`
			`binarized_size = binarized_img.size`
			`assert original_size == binarized_size`

pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog):`
			`indir = testdir.joinpath('resources')`
			`outdir = tmp_path`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_BIN,`
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`'-di', str(indir),`
resolving tests error 2025-07-23 16:44:17 +02:00			`'-o', str(outdir),`
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`]`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'SbbBinarizer'`
			`runner = CliRunner()`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(binarization_cli, args, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
pytest: add tests for directory mode (layout+bin) 2025-04-04 23:48:30 +02:00			`logmsgs = [logrec.message for logrec in caplog.records]`
			`assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2`
			`assert len(list(outdir.iterdir())) == 2`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00
			`def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):`
			`infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')`
			`outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_LAYOUT,`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`'-i', str(infile),`
			`'-o', str(outfile.parent),`
			`# subtests write to same location`
			`'--overwrite',`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'enhancement'`
			`runner = CliRunner()`
			`for options in [`
			`[], # defaults`
			`["-sos"],`
			`]:`
			`with subtests.test(#msg="test CLI",`
			`options=options):`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
			`logmsgs = [logrec.message for logrec in caplog.records]`
			`assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs`
			`assert outfile.exists()`
			`with Image.open(infile) as original_img:`
			`original_size = original_img.size`
			`with Image.open(outfile) as enhanced_img:`
			`enhanced_size = enhanced_img.size`
			`assert (original_size == enhanced_size) == ("-sos" in options)`

			`def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):`
			`indir = testdir.joinpath('resources')`
			`outdir = tmp_path`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_LAYOUT,`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`'-di', str(indir),`
			`'-o', str(outdir),`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'enhancement'`
			`runner = CliRunner()`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(enhancement_cli, args, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
			`logmsgs = [logrec.message for logrec in caplog.records]`
			`assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2`
			`assert len(list(outdir.iterdir())) == 2`

			`def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):`
			`infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')`
			`outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_LAYOUT,`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`'-i', str(infile),`
			`'-o', str(outfile.parent),`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'mbreorder'`
			`runner = CliRunner()`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
			`logmsgs = [logrec.message for logrec in caplog.records]`
			`# FIXME: mbreorder has no logging!`
			`#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs`
			`assert outfile.exists()`
			`#in_tree = page_from_file(str(infile)).etree`
			`#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)`
			`out_tree = page_from_file(str(outfile)).etree`
			`out_order = out_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)`
			`#assert len(out_order) >= 2, "result is inaccurate"`
			`#assert in_order != out_order`
			`assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']`

			`def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):`
			`indir = testdir.joinpath('resources')`
			`outdir = tmp_path`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_LAYOUT,`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`'-di', str(indir),`
			`'-o', str(outdir),`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
			`caplog.set_level(logging.INFO)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'mbreorder'`
			`runner = CliRunner()`
			`with caplog.filtering(only_eynollah):`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)`
add tests for enhancement and mbreorder 2025-09-25 01:13:48 +02:00			`assert result.exit_code == 0, result.stdout`
			`logmsgs = [logrec.message for logrec in caplog.records]`
			`# FIXME: mbreorder has no logging!`
			`#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2`
			`assert len(list(outdir.iterdir())) == 2`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00
			`def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):`
			`infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')`
			`outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')`
test_run: make ocr -doit work (add truetype file) 2025-09-25 22:25:05 +02:00			`outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`outrenderfile.parent.mkdir()`
			`args = [`
tests: adapt to layout/ocr model split 2025-09-25 21:47:15 +02:00			`'-m', MODELS_OCR,`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`'-i', str(infile),`
			`'-dx', str(infile.parent),`
			`'-o', str(outfile.parent),`
			`# subtests write to same location`
			`'--overwrite',`
			`]`
			`if pytestconfig.getoption('verbose') > 0:`
			`args.extend(['-l', 'DEBUG'])`
			`caplog.set_level(logging.DEBUG)`
			`def only_eynollah(logrec):`
			`return logrec.name == 'eynollah'`
			`runner = CliRunner()`
			`for options in [`
disable the -doit OCR test 2025-09-26 12:54:29 +02:00			`# kba Fri Sep 26 12:53:49 CEST 2025`
tests: also disable ...ocr_directory test 2025-09-26 13:57:08 +02:00			`# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged`
disable the -doit OCR test 2025-09-26 12:54:29 +02:00			`# [], # defaults`
			`# ["-doit", str(outrenderfile.parent)],`
test_run: add tests for ocr 2025-09-25 19:53:19 +02:00			`["-trocr"],`
			`]:`
			`with subtests.test(#msg="test CLI",`
			`options=options):`
			`with caplog.filtering(only_eynollah):`
			`result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)`
			`assert result.exit_code == 0, result.stdout`
			`logmsgs = [logrec.message for logrec in caplog.records]`
			`# FIXME: ocr has no logging!`
			`#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs`
			`assert outfile.exists()`
			`if "-doit" in options:`
			`assert outrenderfile.exists()`
			`#in_tree = page_from_file(str(infile)).etree`
			`#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)`
			`out_tree = page_from_file(str(outfile)).etree`
			`out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)`
			`assert len(out_texts) >= 2, ("result is inaccurate", out_texts)`
			`assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)`

comment out the offending test outright 2025-09-26 14:37:04 +02:00			`# kba Fri Sep 26 12:53:49 CEST 2025`
			`# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged`
			`# def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):`
			`# indir = testdir.joinpath('resources')`
			`# outdir = tmp_path`
			`# args = [`
			`# '-m', MODELS_OCR,`
			`# '-di', str(indir),`
			`# '-dx', str(indir),`
			`# '-o', str(outdir),`
			`# ]`
			`# if pytestconfig.getoption('verbose') > 0:`
			`# args.extend(['-l', 'DEBUG'])`
			`# caplog.set_level(logging.INFO)`
			`# def only_eynollah(logrec):`
			`# return logrec.name == 'eynollah'`
			`# runner = CliRunner()`
			`# with caplog.filtering(only_eynollah):`
			`# result = runner.invoke(ocr_cli, args, catch_exceptions=False)`
			`# assert result.exit_code == 0, result.stdout`
			`# logmsgs = [logrec.message for logrec in caplog.records]`
			`# # FIXME: ocr has no logging!`
			`# #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs`
			`# assert len(list(outdir.iterdir())) == 2`