You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dinglehopper/src/dinglehopper/tests/test_integ_empty_files.py

36 lines
958 B
Python

from __future__ import division, print_function
import math
import pytest
from .. import character_error_rate, plain_text
from .util import working_directory
@pytest.mark.integration
@pytest.mark.parametrize(
"gt_file_content,ocr_file_content,cer_expected",
[
("", "Lorem ipsum", math.inf),
("Lorem ipsum", "", 1.0),
("\ufeff", "Lorem ipsum", math.inf),
("Lorem ipsum", "\ufeff", 1.0),
("", "", 0.0),
("\ufeff", "", 0.0),
("", "\ufeff", 0.0),
],
)
def test_empty_files(tmp_path, gt_file_content, ocr_file_content, cer_expected):
with working_directory(tmp_path):
with open("gt.txt", "w") as gtf:
gtf.write(gt_file_content)
with open("ocr.txt", "w") as ocrf:
ocrf.write(ocr_file_content)
gt_text = plain_text("gt.txt")
ocr_text = plain_text("ocr.txt")
assert character_error_rate(gt_text, ocr_text) == cer_expected