✔ Test using empty files
Test edge cases + empty files, e.g. empty text content and a Unicode BOM character. See also gh-79.pull/111/head
parent
98d7928f45
commit
2383730a55
@ -0,0 +1,35 @@
|
|||||||
|
from __future__ import division, print_function
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from .. import character_error_rate, plain_text
|
||||||
|
from .util import working_directory
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"gt_file_content,ocr_file_content,cer_expected",
|
||||||
|
[
|
||||||
|
("", "Lorem ipsum", math.inf),
|
||||||
|
("Lorem ipsum", "", 1.0),
|
||||||
|
("\ufeff", "Lorem ipsum", math.inf),
|
||||||
|
("Lorem ipsum", "\ufeff", 1.0),
|
||||||
|
("", "", 0.0),
|
||||||
|
("\ufeff", "", 0.0),
|
||||||
|
("", "\ufeff", 0.0),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_empty_files(tmp_path, gt_file_content, ocr_file_content, cer_expected):
|
||||||
|
with working_directory(tmp_path):
|
||||||
|
|
||||||
|
with open("gt.txt", "w") as gtf:
|
||||||
|
gtf.write(gt_file_content)
|
||||||
|
with open("ocr.txt", "w") as ocrf:
|
||||||
|
ocrf.write(ocr_file_content)
|
||||||
|
|
||||||
|
gt_text = plain_text("gt.txt")
|
||||||
|
ocr_text = plain_text("ocr.txt")
|
||||||
|
|
||||||
|
assert character_error_rate(gt_text, ocr_text) == cer_expected
|
Loading…
Reference in New Issue