From a3e1b3d4d5612aa77660f9b56940f9e67f2b84eb Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 4 Apr 2025 23:37:00 +0200 Subject: [PATCH] pytest: add asserts for results, add binarization --- Makefile | 4 ++- tests/test_run.py | 66 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 516dd1a..d910596 100644 --- a/Makefile +++ b/Makefile @@ -105,8 +105,10 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif $(RM) -r $(TMPDIR) # Run unit tests +test: export EYNOLLAH_MODELS=$(CURDIR)/models_eynollah +test: export SBBBIN_MODELS=$(CURDIR)/default-2021-03-09 test: - EYNOLLAH_MODELS=$(CURDIR)/models_eynollah $(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS) + $(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS) coverage: coverage erase diff --git a/tests/test_run.py b/tests/test_run.py index 5320637..741c544 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -1,22 +1,32 @@ from os import environ from pathlib import Path -from eynollah.cli import layout as eynollah_cli +import logging +from PIL import Image +from eynollah.cli import layout as layout_cli, binarization as binarization_cli from click.testing import CliRunner +from ocrd_modelfactory import page_from_file +from ocrd_models.constants import NAMESPACES as NS testdir = Path(__file__).parent.resolve() EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve())) +SBBBIN_MODELS = environ.get('SBBBIN_MODELS', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) -def test_full_run(tmpdir, subtests, pytestconfig): +def test_run_eynollah_layout(tmp_path, subtests, pytestconfig, caplog): + infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') + outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' args = [ '-m', EYNOLLAH_MODELS, - '-i', str(testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')), - '-o', tmpdir, + '-i', str(infile), + '-o', str(outfile.parent), # subtests write to same location '--overwrite', ] if pytestconfig.getoption('verbose') > 0: args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'eynollah' runner = CliRunner() for options in [ [], # defaults @@ -32,8 +42,50 @@ def test_full_run(tmpdir, subtests, pytestconfig): ]: with subtests.test(#msg="test CLI", options=options): - result = runner.invoke(eynollah_cli, args + options) + with caplog.filtering(only_eynollah): + result = runner.invoke(layout_cli, args + options) print(result) - print(result.output) assert result.exit_code == 0 - assert 'kant_aufklaerung_1784_0020.tif' in result.output + logmsgs = [logrec.message for logrec in caplog.records] + assert str(infile) in logmsgs + assert outfile.exists() + tree = page_from_file(str(outfile)).etree + regions = tree.xpath("//page:TextRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + lines = tree.xpath("//page:TextLine", namespaces=NS) + assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line + +def test_run_eynollah_binarization(tmp_path, subtests, pytestconfig, caplog): + infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') + outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') + args = [ + '-m', SBBBIN_MODELS, + str(infile), + str(outfile), + ] + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'SbbBinarizer' + runner = CliRunner() + for options in [ + [], # defaults + ["--no-patches"], + # --dir_in --dir_out + ]: + with subtests.test(#msg="test CLI", + options=options): + with caplog.filtering(only_eynollah): + result = runner.invoke(binarization_cli, args + options) + print(result) + assert result.exit_code == 0 + logmsgs = [logrec.message for logrec in caplog.records] + assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) + assert outfile.exists() + with Image.open(infile) as original_img: + original_size = original_img.size + with Image.open(outfile) as binarized_img: + binarized_size = binarized_img.size + assert original_size == binarized_size +