tests: switch from subtests to parametrize, use --isolate everywhere to free CUDA memory in between

This commit is contained in:
Robert Sachunsky 2025-09-30 19:20:35 +02:00
parent 375e0263d4
commit 61b20cc83d
3 changed files with 100 additions and 106 deletions

View file

@ -18,7 +18,7 @@ BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v
OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1 OCR_MODEL := https://zenodo.org/records/17194824/files/models_ocr_v0_5_0.tar.gz?download=1
PYTEST_ARGS ?= -vv PYTEST_ARGS ?= -vv --isolate
# BEGIN-EVAL makefile-parser --make-help Makefile # BEGIN-EVAL makefile-parser --make-help Makefile

View file

@ -1,4 +1,4 @@
pytest pytest
pytest-subtests pytest-isolate
coverage[toml] coverage[toml]
black black

View file

@ -20,23 +20,9 @@ MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') "options",
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml' [
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
for options in [
[], # defaults [], # defaults
["--allow_scaling", "--curved-line"], ["--allow_scaling", "--curved-line"],
["--allow_scaling", "--curved-line", "--full-layout"], ["--allow_scaling", "--curved-line", "--full-layout"],
@ -47,9 +33,21 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog):
# -eoi ... # -eoi ...
# --do_ocr # --do_ocr
# --skip_layout_and_reading_order # --skip_layout_and_reading_order
]: ], ids=str)
with subtests.test(#msg="test CLI", def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
options=options): infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path / 'kant_aufklaerung_1784_0020.xml'
args = [
'-m', MODELS_LAYOUT,
'-i', str(infile),
'-o', str(outfile.parent),
]
if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False) result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout assert result.exit_code == 0, result.stdout
@ -86,7 +84,13 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in')) assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in'))
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
"options",
[
[], # defaults
["--no-patches"],
], ids=str)
def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [ args = [
@ -100,12 +104,6 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer' return logrec.name == 'SbbBinarizer'
runner = CliRunner() runner = CliRunner()
for options in [
[], # defaults
["--no-patches"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout assert result.exit_code == 0, result.stdout
@ -118,7 +116,7 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca
binarized_size = binarized_img.size binarized_size = binarized_img.size
assert original_size == binarized_size assert original_size == binarized_size
def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [
@ -139,15 +137,19 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
"options",
[
[], # defaults
["-sos"],
], ids=str)
def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
args = [ args = [
'-m', MODELS_LAYOUT, '-m', MODELS_LAYOUT,
'-i', str(infile), '-i', str(infile),
'-o', str(outfile.parent), '-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
] ]
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
@ -155,12 +157,6 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'enhancement' return logrec.name == 'enhancement'
runner = CliRunner() runner = CliRunner()
for options in [
[], # defaults
["-sos"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout assert result.exit_code == 0, result.stdout
@ -173,7 +169,7 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap
enhanced_size = enhanced_img.size enhanced_size = enhanced_img.size
assert (original_size == enhanced_size) == ("-sos" in options) assert (original_size == enhanced_size) == ("-sos" in options)
def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [
@ -194,7 +190,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca
assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
args = [ args = [
@ -223,7 +219,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo
#assert in_order != out_order #assert in_order != out_order
assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3'] assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3']
def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [
@ -245,7 +241,15 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
#assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2 #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2
assert len(list(outdir.iterdir())) == 2 assert len(list(outdir.iterdir())) == 2
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): @pytest.mark.parametrize(
"options",
[
[], # defaults
["-doit", #str(outrenderfile.parent)],
],
["-trocr"],
], ids=str)
def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png') outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
@ -255,8 +259,6 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
'-i', str(infile), '-i', str(infile),
'-dx', str(infile.parent), '-dx', str(infile.parent),
'-o', str(outfile.parent), '-o', str(outfile.parent),
# subtests write to same location
'--overwrite',
] ]
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
@ -264,15 +266,8 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
def only_eynollah(logrec): def only_eynollah(logrec):
return logrec.name == 'eynollah' return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
for options in [ if "-doit" in options:
# kba Fri Sep 26 12:53:49 CEST 2025 options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
# Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged
# [], # defaults
# ["-doit", str(outrenderfile.parent)],
["-trocr"],
]:
with subtests.test(#msg="test CLI",
options=options):
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) result = runner.invoke(ocr_cli, args + options, catch_exceptions=False)
assert result.exit_code == 0, result.stdout assert result.exit_code == 0, result.stdout
@ -289,8 +284,7 @@ def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
assert len(out_texts) >= 2, ("result is inaccurate", out_texts) assert len(out_texts) >= 2, ("result is inaccurate", out_texts)
assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts) assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts)
@pytest.mark.skip("Disabled until NHWC/NCHW error in https://github.com/qurator-spk/eynollah/actions/runs/18019655200/job/51273541895 debugged") def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog):
indir = testdir.joinpath('resources') indir = testdir.joinpath('resources')
outdir = tmp_path outdir = tmp_path
args = [ args = [