add tests for enhancement and mbreorder

This commit is contained in:
Robert Sachunsky 2025-09-25 01:13:48 +02:00
parent 9967510327
commit f07df080f0
3 changed files with 3875 additions and 9 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,12 @@ from os import environ
from pathlib import Path from pathlib import Path
import logging import logging
from PIL import Image from PIL import Image
from eynollah.cli import layout as layout_cli, binarization as binarization_cli from eynollah.cli import (
layout as layout_cli,
binarization as binarization_cli,
enhancement as enhancement_cli,
machine_based_reading_order as mbreorder_cli,
)
from click.testing import CliRunner from click.testing import CliRunner
from ocrd_modelfactory import page_from_file from ocrd_modelfactory import page_from_file
from ocrd_models.constants import NAMESPACES as NS from ocrd_models.constants import NAMESPACES as NS
@ -44,8 +49,7 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
options=options): options=options):
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False) result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
print(result) assert result.exit_code == 0, result.stdout
assert result.exit_code == 0
logmsgs = [logrec.message for logrec in caplog.records] logmsgs = [logrec.message for logrec in caplog.records]
assert str(infile) in logmsgs assert str(infile) in logmsgs
assert outfile.exists() assert outfile.exists()
@ -73,8 +77,7 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args) result = runner.invoke(layout_cli, args)
print(result) assert result.exit_code == 0, result.stdout
assert result.exit_code == 0
logmsgs = [logrec.message for logrec in caplog.records] logmsgs = [logrec.message for logrec in caplog.records]
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2 assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2
assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in')) assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
@ -88,6 +91,8 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
'-i', str(infile), '-i', str(infile),
'-o', str(outfile), '-o', str(outfile),
] ]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer' return logrec.name == 'SbbBinarizer'
@ -100,8 +105,7 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
options=options): options=options):
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args + options) result = runner.invoke(binarization_cli, args + options)
print(result) assert result.exit_code == 0, result.stdout
assert result.exit_code == 0
logmsgs = [logrec.message for logrec in caplog.records] logmsgs = [logrec.message for logrec in caplog.records]
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
assert outfile.exists() assert outfile.exists()
@ -119,14 +123,121 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
'-di', str(indir), '-di', str(indir),
'-o', str(outdir), '-o', str(outdir),
] ]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer' return logrec.name == 'SbbBinarizer'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args) result = runner.invoke(binarization_cli, args)
print(result) assert result.exit_code == 0, result.stdout
assert result.exit_code == 0
logmsgs = [logrec.message for logrec in caplog.records] logmsgs = [logrec.message for logrec in caplog.records]
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [
'-m', EYNOLLAH_MODELS,
'-i', str(infile),
'-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'enhancement'
runner = CliRunner()
for options in [
[], # defaults
["-sos"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args + options)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
assert outfile.exists()
with Image.open(infile) as original_img:
original_size = original_img.size
with Image.open(outfile) as enhanced_img:
enhanced_size = enhanced_img.size
assert (original_size == enhanced_size) == ("-sos" in options)
def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog):
indir = testdir.joinpath('resources')
outdir = tmp_path
args = [
'-m', EYNOLLAH_MODELS,
'-di', str(indir),
'-o', str(outdir),
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'enhancement'
runner = CliRunner()
with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
args = [
'-m', EYNOLLAH_MODELS,
'-i', str(infile),
'-o', str(outfile.parent),
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'mbreorder'
runner = CliRunner()
with caplog.filtering(only_eynollah):
result = runner.invoke(mbreorder_cli, args)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
# FIXME: mbreorder has no logging!
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
assert outfile.exists()
#in_tree = page_from_file(str(infile)).etree
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
out_tree = page_from_file(str(outfile)).etree
out_order = out_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
#assert len(out_order) >= 2, "result is inaccurate"
#assert in_order != out_order
assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog):
indir = testdir.joinpath('resources')
outdir = tmp_path
args = [
'-m', EYNOLLAH_MODELS,
'-di', str(indir),
'-o', str(outdir),
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'mbreorder'
runner = CliRunner()
with caplog.filtering(only_eynollah):
result = runner.invoke(mbreorder_cli, args)
assert result.exit_code == 0, result.stdout
logmsgs = [logrec.message for logrec in caplog.records]
# FIXME: mbreorder has no logging!
#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
assert len(list(outdir.iterdir())) == 2