From 5273d10bac5587b26a6a4adfb588e9b05d772fb5 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 2 Dec 2019 14:58:35 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20dinglehopper:=20Generate=20a=20l?= =?UTF-8?q?oadable=20JSON=20report=20even=20if=20CER=3D=E2=88=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/cli.py | 14 +++++ qurator/dinglehopper/templates/report.json.j2 | 4 +- .../tests/test_integ_cli_valid_json.py | 51 +++++++++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 qurator/dinglehopper/tests/test_integ_cli_valid_json.py diff --git a/qurator/dinglehopper/cli.py b/qurator/dinglehopper/cli.py index efb3a34..1500574 100644 --- a/qurator/dinglehopper/cli.py +++ b/qurator/dinglehopper/cli.py @@ -63,7 +63,21 @@ def process(gt, ocr, report_prefix): ocr_words = words_normalized(ocr_text) word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='⋯', align=seq_align) + def json_float(value): + """Convert a float value to an JSON float. + + This is here so that float('inf') yields "Infinity", not "inf". + """ + if value == float('inf'): + return 'Infinity' + elif value == float('-inf'): + return '-Infinity' + else: + return str(value) + env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates'))) + env.filters['json_float'] = json_float + for report_suffix in ('.html', '.json'): template_fn = 'report' + report_suffix + '.j2' out_fn = report_prefix + report_suffix diff --git a/qurator/dinglehopper/templates/report.json.j2 b/qurator/dinglehopper/templates/report.json.j2 index 8619cd8..62d3f77 100644 --- a/qurator/dinglehopper/templates/report.json.j2 +++ b/qurator/dinglehopper/templates/report.json.j2 @@ -1,6 +1,6 @@ { "gt": "{{ gt }}", "ocr": "{{ ocr }}", - "cer": {{ cer|round(6) }}, - "wer": {{ wer|round(6) }} + "cer": {{ cer|json_float }}, + "wer": {{ wer|json_float }} } diff --git a/qurator/dinglehopper/tests/test_integ_cli_valid_json.py b/qurator/dinglehopper/tests/test_integ_cli_valid_json.py new file mode 100644 index 0000000..876576b --- /dev/null +++ b/qurator/dinglehopper/tests/test_integ_cli_valid_json.py @@ -0,0 +1,51 @@ +import os +import json + +import pytest + +from ..cli import process + + +class working_directory: + """Context manager to temporarily change the working directory""" + def __init__(self, wd): + self.wd = wd + + def __enter__(self): + self.old_wd = os.getcwd() + os.chdir(self.wd) + + def __exit__(self, etype, value, traceback): + os.chdir(self.wd) + + +def test_cli_json(tmp_path): + """Test that the cli/process() yields a loadable JSON report""" + + # XXX Path.__str__() is necessary for Python 3.5 + with working_directory(str(tmp_path)): + with open('gt.txt', 'w') as gtf: + gtf.write('AAAAA') + with open('ocr.txt', 'w') as ocrf: + ocrf.write('AAAAB') + + process('gt.txt', 'ocr.txt', 'report') + with open('report.json', 'r') as jsonf: + j = json.load(jsonf) + assert j['cer'] == pytest.approx(0.2) + + +def test_cli_json_cer_is_infinity(tmp_path): + """Test that the cli/process() yields a loadable JSON report when CER == inf""" + + # XXX Path.__str__() is necessary for Python 3.5 + with working_directory(str(tmp_path)): + with open('gt.txt', 'w') as gtf: + gtf.write('') # Empty to yield CER == inf + with open('ocr.txt', 'w') as ocrf: + ocrf.write('Not important') + + process('gt.txt', 'ocr.txt', 'report') + with open('report.json', 'r') as jsonf: + j = json.load(jsonf) + assert j['cer'] == pytest.approx(float('inf'))