1
0
Fork 0
mirror of https://github.com/qurator-spk/dinglehopper.git synced 2025-06-18 16:19:59 +02:00

✔ Test using empty files

Test edge cases + empty files, e.g. empty text content and a Unicode BOM character.

See also gh-79.
This commit is contained in:
Mike Gerber 2024-04-08 20:33:03 +02:00
parent 98d7928f45
commit 2383730a55

View file

@ -0,0 +1,35 @@
from __future__ import division, print_function
import math
import pytest
from .. import character_error_rate, plain_text
from .util import working_directory
@pytest.mark.integration
@pytest.mark.parametrize(
"gt_file_content,ocr_file_content,cer_expected",
[
("", "Lorem ipsum", math.inf),
("Lorem ipsum", "", 1.0),
("\ufeff", "Lorem ipsum", math.inf),
("Lorem ipsum", "\ufeff", 1.0),
("", "", 0.0),
("\ufeff", "", 0.0),
("", "\ufeff", 0.0),
],
)
def test_empty_files(tmp_path, gt_file_content, ocr_file_content, cer_expected):
with working_directory(tmp_path):
with open("gt.txt", "w") as gtf:
gtf.write(gt_file_content)
with open("ocr.txt", "w") as ocrf:
ocrf.write(ocr_file_content)
gt_text = plain_text("gt.txt")
ocr_text = plain_text("ocr.txt")
assert character_error_rate(gt_text, ocr_text) == cer_expected