You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dinglehopper/dinglehopper/tests/test_integ_differences.py

28 lines
967 B
Python

import json
import os
import pytest
from ocrd_utils import initLogging
from dinglehopper.cli import process
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
@pytest.mark.integration
def test_cli_differences(tmp_path):
"""Test that the cli/process() yields a JSON report that includes
the differences found between the GT and OCR text"""
initLogging()
process(os.path.join(data_dir, "test-gt.page2018.xml"),
os.path.join(data_dir, "test-fake-ocr.page2018.xml"),
"report", tmp_path, differences=True)
assert os.path.exists(tmp_path / "report.json")
with open(tmp_path / "report.json", "r") as jsonf:
j = json.load(jsonf)
assert j["differences"] == {"character_level": {'n :: m': 1, 'ſ :: f': 1},
"word_level": {'Augenblick :: Augemblick': 1,
'Verſprochene :: Verfprochene': 1}}