You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
958 B
Python
36 lines
958 B
Python
from __future__ import division, print_function
|
|
|
|
import math
|
|
|
|
import pytest
|
|
|
|
from .. import character_error_rate, plain_text
|
|
from .util import working_directory
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.parametrize(
|
|
"gt_file_content,ocr_file_content,cer_expected",
|
|
[
|
|
("", "Lorem ipsum", math.inf),
|
|
("Lorem ipsum", "", 1.0),
|
|
("\ufeff", "Lorem ipsum", math.inf),
|
|
("Lorem ipsum", "\ufeff", 1.0),
|
|
("", "", 0.0),
|
|
("\ufeff", "", 0.0),
|
|
("", "\ufeff", 0.0),
|
|
],
|
|
)
|
|
def test_empty_files(tmp_path, gt_file_content, ocr_file_content, cer_expected):
|
|
with working_directory(tmp_path):
|
|
|
|
with open("gt.txt", "w") as gtf:
|
|
gtf.write(gt_file_content)
|
|
with open("ocr.txt", "w") as ocrf:
|
|
ocrf.write(ocr_file_content)
|
|
|
|
gt_text = plain_text("gt.txt")
|
|
ocr_text = plain_text("ocr.txt")
|
|
|
|
assert character_error_rate(gt_text, ocr_text) == cer_expected
|