You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import pytest
|
|
|
|
|
from ocrd_utils import initLogging
|
|
|
|
|
from dinglehopper.cli import process
|
|
|
|
|
|
|
|
|
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
|
def test_cli_differences(tmp_path):
|
|
|
|
|
"""Test that the cli/process() yields a JSON report that includes
|
|
|
|
|
the differences found between the GT and OCR text"""
|
|
|
|
|
|
|
|
|
|
initLogging()
|
|
|
|
|
process(os.path.join(data_dir, "test-gt.page2018.xml"),
|
|
|
|
|
os.path.join(data_dir, "test-fake-ocr.page2018.xml"),
|
|
|
|
|
"report", tmp_path, differences=True)
|
|
|
|
|
|
|
|
|
|
assert os.path.exists(tmp_path / "report.json")
|
|
|
|
|
|
|
|
|
|
with open(tmp_path / "report.json", "r") as jsonf:
|
|
|
|
|
j = json.load(jsonf)
|
|
|
|
|
|
|
|
|
|
assert j["differences"] == {"character_level": {'n :: m': 1, 'ſ :: f': 1},
|
|
|
|
|
"word_level": {'Augenblick :: Augemblick': 1,
|
|
|
|
|
'Verſprochene :: Verfprochene': 1}}
|