mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-06 22:50:14 +02:00
test_run: add tests for ocr
This commit is contained in:
parent
2d14d57e4f
commit
5c7e1f21fb
1 changed files with 73 additions and 7 deletions
|
@ -7,6 +7,7 @@ from eynollah.cli import (
|
|||
binarization as binarization_cli,
|
||||
enhancement as enhancement_cli,
|
||||
machine_based_reading_order as mbreorder_cli,
|
||||
ocr as ocr_cli,
|
||||
)
|
||||
from click.testing import CliRunner
|
||||
from ocrd_modelfactory import page_from_file
|
||||
|
@ -76,7 +77,7 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
|
|||
return logrec.name == 'eynollah'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(layout_cli, args)
|
||||
result = runner.invoke(layout_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2
|
||||
|
@ -104,7 +105,7 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
|
|||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(binarization_cli, args + options)
|
||||
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting'))
|
||||
|
@ -130,7 +131,7 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
|
|||
return logrec.name == 'SbbBinarizer'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(binarization_cli, args)
|
||||
result = runner.invoke(binarization_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
|
||||
|
@ -159,7 +160,7 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
|
|||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(enhancement_cli, args + options)
|
||||
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs
|
||||
|
@ -185,7 +186,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
|
|||
return logrec.name == 'enhancement'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(enhancement_cli, args)
|
||||
result = runner.invoke(enhancement_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
|
||||
|
@ -206,7 +207,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
|
|||
return logrec.name == 'mbreorder'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(mbreorder_cli, args)
|
||||
result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
# FIXME: mbreorder has no logging!
|
||||
|
@ -235,9 +236,74 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
|
|||
return logrec.name == 'mbreorder'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(mbreorder_cli, args)
|
||||
result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
# FIXME: mbreorder has no logging!
|
||||
#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
|
||||
assert len(list(outdir.iterdir())) == 2
|
||||
|
||||
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
||||
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.xml')
|
||||
outrenderfile.parent.mkdir()
|
||||
args = [
|
||||
'-m', EYNOLLAH_MODELS,
|
||||
'-i', str(infile),
|
||||
'-dx', str(infile.parent),
|
||||
'-o', str(outfile.parent),
|
||||
# subtests write to same location
|
||||
'--overwrite',
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
caplog.set_level(logging.DEBUG)
|
||||
def only_eynollah(logrec):
|
||||
return logrec.name == 'eynollah'
|
||||
runner = CliRunner()
|
||||
for options in [
|
||||
[], # defaults
|
||||
["-doit", str(outrenderfile.parent)],
|
||||
["-trocr"],
|
||||
]:
|
||||
with subtests.test(#msg="test CLI",
|
||||
options=options):
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
# FIXME: ocr has no logging!
|
||||
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
|
||||
assert outfile.exists()
|
||||
if "-doit" in options:
|
||||
assert outrenderfile.exists()
|
||||
#in_tree = page_from_file(str(infile)).etree
|
||||
#in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS)
|
||||
out_tree = page_from_file(str(outfile)).etree
|
||||
out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS)
|
||||
assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
|
||||
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
|
||||
|
||||
def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
|
||||
indir = testdir.joinpath('resources')
|
||||
outdir = tmp_path
|
||||
args = [
|
||||
'-m', EYNOLLAH_MODELS,
|
||||
'-di', str(indir),
|
||||
'-dx', str(indir),
|
||||
'-o', str(outdir),
|
||||
]
|
||||
if pytestconfig.getoption('verbose') > 0:
|
||||
args.extend(['-l', 'DEBUG'])
|
||||
caplog.set_level(logging.INFO)
|
||||
def only_eynollah(logrec):
|
||||
return logrec.name == 'eynollah'
|
||||
runner = CliRunner()
|
||||
with caplog.filtering(only_eynollah):
|
||||
result = runner.invoke(ocr_cli, args, catch_exceptions=False)
|
||||
assert result.exit_code == 0, result.stdout
|
||||
logmsgs = [logrec.message for logrec in caplog.records]
|
||||
# FIXME: ocr has no logging!
|
||||
#assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs
|
||||
assert len(list(outdir.iterdir())) == 2
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue