From 11a6341641e6b26d267311efeda0110e028fe9e0 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 18 Nov 2019 15:03:17 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20dinglehopper:=20Remove=20broken?= =?UTF-8?q?=20implementation=20of=20the=20unordered=20word=20error=20rate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/cli.py | 3 +-- qurator/dinglehopper/templates/report.html.j2 | 1 - qurator/dinglehopper/tests/test_word_error_rate.py | 10 +--------- qurator/dinglehopper/word_error_rate.py | 6 ------ 4 files changed, 2 insertions(+), 18 deletions(-) diff --git a/qurator/dinglehopper/cli.py b/qurator/dinglehopper/cli.py index 5d7f43b..efb3a34 100644 --- a/qurator/dinglehopper/cli.py +++ b/qurator/dinglehopper/cli.py @@ -56,7 +56,6 @@ def process(gt, ocr, report_prefix): cer = character_error_rate(gt_text, ocr_text) wer = word_error_rate(gt_text, ocr_text) - uwer = unordered_word_error_rate(gt_text, ocr_text) char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align) @@ -72,7 +71,7 @@ def process(gt, ocr, report_prefix): template = env.get_template(template_fn) template.stream( gt=gt, ocr=ocr, - cer=cer, wer=wer, uwer=uwer, + cer=cer, wer=wer, char_diff_report=char_diff_report, word_diff_report=word_diff_report ).dump(out_fn) diff --git a/qurator/dinglehopper/templates/report.html.j2 b/qurator/dinglehopper/templates/report.html.j2 index 5e56c73..80ffae3 100644 --- a/qurator/dinglehopper/templates/report.html.j2 +++ b/qurator/dinglehopper/templates/report.html.j2 @@ -35,7 +35,6 @@

Metrics

CER: {{ cer|round(4) }}

WER: {{ wer|round(4) }}

-

Character differences

{{ char_diff_report }} diff --git a/qurator/dinglehopper/tests/test_word_error_rate.py b/qurator/dinglehopper/tests/test_word_error_rate.py index a707229..ad19172 100644 --- a/qurator/dinglehopper/tests/test_word_error_rate.py +++ b/qurator/dinglehopper/tests/test_word_error_rate.py @@ -2,7 +2,7 @@ from __future__ import division, print_function import math -from .. import word_error_rate, words, unordered_word_error_rate +from .. import word_error_rate, words def test_words(): @@ -35,11 +35,3 @@ def test_word_error_rate(): assert word_error_rate('', '') == 0 assert word_error_rate('Schlyñ lorem ipsum dolor sit amet,', 'Schlym̃ lorem ipsum dolor sit amet.') == 1/6 - - -def test_unordered_word_error_rate(): - assert unordered_word_error_rate('abc def ghi', 'ghi abc def') == 0 - assert unordered_word_error_rate('abc def ghi', 'ghi abcX def') == 1/3 - assert unordered_word_error_rate('abc def ghi jkl', 'abc ghi def jkl') == 0 - assert unordered_word_error_rate('abc def ghi jkl', 'abc ghi defX jkl') == 1/4 - # XXX There seem to be some cases where this does not work diff --git a/qurator/dinglehopper/word_error_rate.py b/qurator/dinglehopper/word_error_rate.py index 3ccfdfc..2425200 100644 --- a/qurator/dinglehopper/word_error_rate.py +++ b/qurator/dinglehopper/word_error_rate.py @@ -61,9 +61,3 @@ def word_error_rate(reference, compared): return float('inf') return d / n - - -def unordered_word_error_rate(reference, compared): - reference_seq = sorted(words_normalized(reference)) - compared_seq = sorted(words_normalized(compared)) - return word_error_rate(reference_seq, compared_seq)