mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-27 19:29:55 +02:00
🧹 dinglehopper: Remove broken implementation of the unordered word error rate
This commit is contained in:
parent
f22228840e
commit
11a6341641
4 changed files with 2 additions and 18 deletions
|
@ -56,7 +56,6 @@ def process(gt, ocr, report_prefix):
|
||||||
|
|
||||||
cer = character_error_rate(gt_text, ocr_text)
|
cer = character_error_rate(gt_text, ocr_text)
|
||||||
wer = word_error_rate(gt_text, ocr_text)
|
wer = word_error_rate(gt_text, ocr_text)
|
||||||
uwer = unordered_word_error_rate(gt_text, ocr_text)
|
|
||||||
|
|
||||||
char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align)
|
char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align)
|
||||||
|
|
||||||
|
@ -72,7 +71,7 @@ def process(gt, ocr, report_prefix):
|
||||||
template = env.get_template(template_fn)
|
template = env.get_template(template_fn)
|
||||||
template.stream(
|
template.stream(
|
||||||
gt=gt, ocr=ocr,
|
gt=gt, ocr=ocr,
|
||||||
cer=cer, wer=wer, uwer=uwer,
|
cer=cer, wer=wer,
|
||||||
char_diff_report=char_diff_report,
|
char_diff_report=char_diff_report,
|
||||||
word_diff_report=word_diff_report
|
word_diff_report=word_diff_report
|
||||||
).dump(out_fn)
|
).dump(out_fn)
|
||||||
|
|
|
@ -35,7 +35,6 @@
|
||||||
<h2>Metrics</h2>
|
<h2>Metrics</h2>
|
||||||
<p>CER: {{ cer|round(4) }}</p>
|
<p>CER: {{ cer|round(4) }}</p>
|
||||||
<p>WER: {{ wer|round(4) }}</p>
|
<p>WER: {{ wer|round(4) }}</p>
|
||||||
<!-- FIXME <p>WER (unordered): {{ uwer|round(4) }}</p> -->
|
|
||||||
|
|
||||||
<h2>Character differences</h2>
|
<h2>Character differences</h2>
|
||||||
{{ char_diff_report }}
|
{{ char_diff_report }}
|
||||||
|
|
|
@ -2,7 +2,7 @@ from __future__ import division, print_function
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
from .. import word_error_rate, words, unordered_word_error_rate
|
from .. import word_error_rate, words
|
||||||
|
|
||||||
|
|
||||||
def test_words():
|
def test_words():
|
||||||
|
@ -35,11 +35,3 @@ def test_word_error_rate():
|
||||||
assert word_error_rate('', '') == 0
|
assert word_error_rate('', '') == 0
|
||||||
|
|
||||||
assert word_error_rate('Schlyñ lorem ipsum dolor sit amet,', 'Schlym̃ lorem ipsum dolor sit amet.') == 1/6
|
assert word_error_rate('Schlyñ lorem ipsum dolor sit amet,', 'Schlym̃ lorem ipsum dolor sit amet.') == 1/6
|
||||||
|
|
||||||
|
|
||||||
def test_unordered_word_error_rate():
|
|
||||||
assert unordered_word_error_rate('abc def ghi', 'ghi abc def') == 0
|
|
||||||
assert unordered_word_error_rate('abc def ghi', 'ghi abcX def') == 1/3
|
|
||||||
assert unordered_word_error_rate('abc def ghi jkl', 'abc ghi def jkl') == 0
|
|
||||||
assert unordered_word_error_rate('abc def ghi jkl', 'abc ghi defX jkl') == 1/4
|
|
||||||
# XXX There seem to be some cases where this does not work
|
|
||||||
|
|
|
@ -61,9 +61,3 @@ def word_error_rate(reference, compared):
|
||||||
return float('inf')
|
return float('inf')
|
||||||
|
|
||||||
return d / n
|
return d / n
|
||||||
|
|
||||||
|
|
||||||
def unordered_word_error_rate(reference, compared):
|
|
||||||
reference_seq = sorted(words_normalized(reference))
|
|
||||||
compared_seq = sorted(words_normalized(compared))
|
|
||||||
return word_error_rate(reference_seq, compared_seq)
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue