pytest: add asserts for results, add binarization

2025-08-02 14:49:54 +02:00 · 2025-04-04 23:37:00 +02:00 · 2025-04-04 23:37:00 +02:00 · a3e1b3d4d5
commit a3e1b3d4d5
parent b03116f4a6
2 changed files with 62 additions and 8 deletions
--- a/4
+++ b/4
@ -105,8 +105,10 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
 	$(RM) -r $(TMPDIR)

 # Run unit tests
+test: export EYNOLLAH_MODELS=$(CURDIR)/models_eynollah
+test: export SBBBIN_MODELS=$(CURDIR)/default-2021-03-09
 test:
-	EYNOLLAH_MODELS=$(CURDIR)/models_eynollah $(PYTHON) -m pytest tests  --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)
+	$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)

 coverage:
 	coverage erase
--- a/tests/test_run.py
+++ b/tests/test_run.py
@ -1,22 +1,32 @@
 from os import environ
 from pathlib import Path
-from eynollah.cli import layout as eynollah_cli
+import logging
+from PIL import Image
+from eynollah.cli import layout as layout_cli, binarization as binarization_cli
 from click.testing import CliRunner
+from ocrd_modelfactory import page_from_file
+from ocrd_models.constants import NAMESPACES as NS

 testdir = Path(__file__).parent.resolve()

 EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve()))
+SBBBIN_MODELS = environ.get('SBBBIN_MODELS', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))

-def test_full_run(tmpdir, subtests, pytestconfig):
+def test_run_eynollah_layout(tmp_path, subtests, pytestconfig, caplog):
+    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
+    outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
    args = [
        '-m', EYNOLLAH_MODELS,
-        '-i', str(testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')),
-        '-o', tmpdir,
+        '-i', str(infile),
+        '-o', str(outfile.parent),
        # subtests write to same location
        '--overwrite',
    ]
    if pytestconfig.getoption('verbose') > 0:
        args.extend(['-l', 'DEBUG'])
+    caplog.set_level(logging.INFO)
+    def only_eynollah(logrec):
+        return logrec.name == 'eynollah'
    runner = CliRunner()
    for options in [
            [], # defaults
@ -32,8 +42,50 @@ def test_full_run(tmpdir, subtests, pytestconfig):
    ]:
        with subtests.test(#msg="test CLI",
                           options=options):
-            result = runner.invoke(eynollah_cli, args + options)
+            with caplog.filtering(only_eynollah):
+                result = runner.invoke(layout_cli, args + options)
            print(result)
-            print(result.output)
            assert result.exit_code == 0
-            assert 'kant_aufklaerung_1784_0020.tif' in result.output
+            logmsgs = [logrec.message for logrec in caplog.records]
+            assert str(infile) in logmsgs
+            assert outfile.exists()
+            tree = page_from_file(str(outfile)).etree
+            regions = tree.xpath("//page:TextRegion", namespaces=NS)
+            assert len(regions) >= 2, "result is inaccurate"
+            regions = tree.xpath("//page:SeparatorRegion", namespaces=NS)
+            assert len(regions) >= 2, "result is inaccurate"
+            lines = tree.xpath("//page:TextLine", namespaces=NS)
+            assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line
+
+def test_run_eynollah_binarization(tmp_path, subtests, pytestconfig, caplog):
+    infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
+    outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
+    args = [
+        '-m', SBBBIN_MODELS,
+        str(infile),
+        str(outfile),
+    ]
+    caplog.set_level(logging.INFO)
+    def only_eynollah(logrec):
+        return logrec.name == 'SbbBinarizer'
+    runner = CliRunner()
+    for options in [
+            [], # defaults
+            ["--no-patches"],
+            # --dir_in --dir_out
+    ]:
+        with subtests.test(#msg="test CLI",
+                           options=options):
+            with caplog.filtering(only_eynollah):
+                result = runner.invoke(binarization_cli, args + options)
+            print(result)
+            assert result.exit_code == 0
+            logmsgs = [logrec.message for logrec in caplog.records]
+            assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
+            assert outfile.exists()
+            with Image.open(infile) as original_img:
+                original_size = original_img.size
+            with Image.open(outfile) as binarized_img:
+                binarized_size = binarized_img.size
+            assert original_size == binarized_size
+