diff --git a/qurator/dinglehopper/cli.py b/qurator/dinglehopper/cli.py index 9a2a837..c9b347f 100644 --- a/qurator/dinglehopper/cli.py +++ b/qurator/dinglehopper/cli.py @@ -1,3 +1,4 @@ +import json import os import click @@ -55,7 +56,10 @@ def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none, matches=None): pos = 0 for ocr_line in ocr_lines_sorted: if ocr_line.line not in ocr_line_region_id.keys(): - ocr_line_region_id[ocr_line.line] = ocr_in.segment_id_for_pos(pos) + try: + ocr_line_region_id[ocr_line.line] = ocr_in.segment_id_for_pos(pos) + except AssertionError: + pass pos += ocr_line.length ocr_ids = {None: None} @@ -159,6 +163,7 @@ def process(gt, ocr, report_prefix, *, metrics="cer,wer", textequiv_level="regio ) ) env.filters["json_float"] = json_float + env.filters["json_dumps"] = json.dumps for report_suffix in (".html", ".json"): template_fn = "report" + report_suffix + ".j2" diff --git a/qurator/dinglehopper/templates/report.json.j2 b/qurator/dinglehopper/templates/report.json.j2 index 161d342..a632590 100644 --- a/qurator/dinglehopper/templates/report.json.j2 +++ b/qurator/dinglehopper/templates/report.json.j2 @@ -6,6 +6,6 @@ {% if n_characters is not none %}"n_characters": {{ n_characters }},{% endif %} {% if n_words is not none %}"n_words": {{ n_words }},{% endif %} {% endif %} - "gt": "{{ gt }}", - "ocr": "{{ ocr }}" + "gt": {{ gt|json_dumps }}, + "ocr": {{ ocr|json_dumps }} }