mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-02 23:19:58 +02:00
🐛 dinglehopper: Generate a loadable JSON report even if CER=∞
This commit is contained in:
parent
ced6504ad0
commit
5273d10bac
3 changed files with 67 additions and 2 deletions
|
@ -63,7 +63,21 @@ def process(gt, ocr, report_prefix):
|
||||||
ocr_words = words_normalized(ocr_text)
|
ocr_words = words_normalized(ocr_text)
|
||||||
word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='⋯', align=seq_align)
|
word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='⋯', align=seq_align)
|
||||||
|
|
||||||
|
def json_float(value):
|
||||||
|
"""Convert a float value to an JSON float.
|
||||||
|
|
||||||
|
This is here so that float('inf') yields "Infinity", not "inf".
|
||||||
|
"""
|
||||||
|
if value == float('inf'):
|
||||||
|
return 'Infinity'
|
||||||
|
elif value == float('-inf'):
|
||||||
|
return '-Infinity'
|
||||||
|
else:
|
||||||
|
return str(value)
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates')))
|
env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates')))
|
||||||
|
env.filters['json_float'] = json_float
|
||||||
|
|
||||||
for report_suffix in ('.html', '.json'):
|
for report_suffix in ('.html', '.json'):
|
||||||
template_fn = 'report' + report_suffix + '.j2'
|
template_fn = 'report' + report_suffix + '.j2'
|
||||||
out_fn = report_prefix + report_suffix
|
out_fn = report_prefix + report_suffix
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"gt": "{{ gt }}",
|
"gt": "{{ gt }}",
|
||||||
"ocr": "{{ ocr }}",
|
"ocr": "{{ ocr }}",
|
||||||
"cer": {{ cer|round(6) }},
|
"cer": {{ cer|json_float }},
|
||||||
"wer": {{ wer|round(6) }}
|
"wer": {{ wer|json_float }}
|
||||||
}
|
}
|
||||||
|
|
51
qurator/dinglehopper/tests/test_integ_cli_valid_json.py
Normal file
51
qurator/dinglehopper/tests/test_integ_cli_valid_json.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from ..cli import process
|
||||||
|
|
||||||
|
|
||||||
|
class working_directory:
|
||||||
|
"""Context manager to temporarily change the working directory"""
|
||||||
|
def __init__(self, wd):
|
||||||
|
self.wd = wd
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.old_wd = os.getcwd()
|
||||||
|
os.chdir(self.wd)
|
||||||
|
|
||||||
|
def __exit__(self, etype, value, traceback):
|
||||||
|
os.chdir(self.wd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_json(tmp_path):
|
||||||
|
"""Test that the cli/process() yields a loadable JSON report"""
|
||||||
|
|
||||||
|
# XXX Path.__str__() is necessary for Python 3.5
|
||||||
|
with working_directory(str(tmp_path)):
|
||||||
|
with open('gt.txt', 'w') as gtf:
|
||||||
|
gtf.write('AAAAA')
|
||||||
|
with open('ocr.txt', 'w') as ocrf:
|
||||||
|
ocrf.write('AAAAB')
|
||||||
|
|
||||||
|
process('gt.txt', 'ocr.txt', 'report')
|
||||||
|
with open('report.json', 'r') as jsonf:
|
||||||
|
j = json.load(jsonf)
|
||||||
|
assert j['cer'] == pytest.approx(0.2)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_json_cer_is_infinity(tmp_path):
|
||||||
|
"""Test that the cli/process() yields a loadable JSON report when CER == inf"""
|
||||||
|
|
||||||
|
# XXX Path.__str__() is necessary for Python 3.5
|
||||||
|
with working_directory(str(tmp_path)):
|
||||||
|
with open('gt.txt', 'w') as gtf:
|
||||||
|
gtf.write('') # Empty to yield CER == inf
|
||||||
|
with open('ocr.txt', 'w') as ocrf:
|
||||||
|
ocrf.write('Not important')
|
||||||
|
|
||||||
|
process('gt.txt', 'ocr.txt', 'report')
|
||||||
|
with open('report.json', 'r') as jsonf:
|
||||||
|
j = json.load(jsonf)
|
||||||
|
assert j['cer'] == pytest.approx(float('inf'))
|
Loading…
Add table
Add a link
Reference in a new issue