Include fca as parameter and add some tests
parent
9b76539936
commit
53064bf833
@ -1,10 +1,11 @@
|
|||||||
{
|
{
|
||||||
"gt": "{{ gt }}",
|
|
||||||
"ocr": "{{ ocr }}",
|
|
||||||
{% if metrics %}
|
{% if metrics %}
|
||||||
"cer": {{ cer|json_float }},
|
{% if cer %}"cer": {{ cer|json_float }},{% endif %}
|
||||||
"wer": {{ wer|json_float }},
|
{% if wer %}"wer": {{ wer|json_float }},{% endif %}
|
||||||
|
{% if fca %}"fca": {{ fca|json_float }},{% endif %}
|
||||||
|
{% if n_characters %}"n_characters": {{ n_characters }},{% endif %}
|
||||||
|
{% if n_words %}"n_words": {{ n_words }},{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
"n_characters": {{ n_characters }},
|
"gt": "{{ gt }}",
|
||||||
"n_words": {{ n_words }}
|
"ocr": "{{ ocr }}"
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from lxml import etree as ET
|
||||||
|
|
||||||
|
from .. import distance, page_text
|
||||||
|
from .. import flexible_character_accuracy, split_matches
|
||||||
|
|
||||||
|
data_dir = os.path.join(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)), "data", "table-order"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("file", ["table-order-0002.xml", "table-no-reading-order.xml"])
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_fac_ignoring_reading_order(file):
|
||||||
|
expected = "1\n2\n3\n4\n5\n6\n7\n8\n9"
|
||||||
|
|
||||||
|
gt = page_text(ET.parse(os.path.join(data_dir, "table-order-0001.xml")))
|
||||||
|
assert gt == expected
|
||||||
|
|
||||||
|
ocr = page_text(ET.parse(os.path.join(data_dir, file)))
|
||||||
|
assert distance(gt, ocr) > 0
|
||||||
|
|
||||||
|
fac, matches = flexible_character_accuracy(gt, ocr)
|
||||||
|
assert fac == pytest.approx(1.0)
|
||||||
|
|
||||||
|
gt_segments, ocr_segments, ops = split_matches(matches)
|
||||||
|
assert not any(ops)
|
||||||
|
assert "".join(gt_segments) == expected
|
||||||
|
assert "".join(ocr_segments) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"file,expected_text",
|
||||||
|
[
|
||||||
|
("table-order-0001.xml", "1\n2\n3\n4\n5\n6\n7\n8\n9"),
|
||||||
|
("table-order-0002.xml", "1\n4\n7\n2\n5\n8\n3\n6\n9"),
|
||||||
|
("table-no-reading-order.xml", "5\n6\n7\n8\n9\n1\n2\n3\n4"),
|
||||||
|
("table-unordered.xml", "5\n6\n7\n8\n9\n1\n2\n3\n4"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_reading_order_settings(file, expected_text):
|
||||||
|
if "table-unordered.xml" == file:
|
||||||
|
with pytest.raises(NotImplementedError):
|
||||||
|
page_text(ET.parse(os.path.join(data_dir, file)))
|
||||||
|
else:
|
||||||
|
ocr = page_text(ET.parse(os.path.join(data_dir, file)))
|
||||||
|
assert ocr == expected_text
|
Loading…
Reference in New Issue