mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-07 19:05:13 +02:00
🚧 dinglehopper: Compute WER in line-dirs CLI
This commit is contained in:
parent
cb2be96179
commit
5b394649a7
1 changed files with 10 additions and 3 deletions
|
@ -43,6 +43,8 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
|
||||||
cer = None
|
cer = None
|
||||||
n_characters = None
|
n_characters = None
|
||||||
char_diff_report = ""
|
char_diff_report = ""
|
||||||
|
wer = None
|
||||||
|
n_words = None
|
||||||
word_diff_report = ""
|
word_diff_report = ""
|
||||||
|
|
||||||
for k, gt in enumerate(os.listdir(gt_dir)):
|
for k, gt in enumerate(os.listdir(gt_dir)):
|
||||||
|
@ -62,13 +64,18 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
|
||||||
n_characters = n_characters + l_n_characters
|
n_characters = n_characters + l_n_characters
|
||||||
|
|
||||||
# Compute WER
|
# Compute WER
|
||||||
# TODO wer, n_words = word_error_rate_n(gt_text, ocr_text)
|
l_wer, l_n_words = word_error_rate_n(gt_text, ocr_text)
|
||||||
wer = 9999; n_words = 0
|
if wer is None:
|
||||||
|
wer, n_words = l_wer, l_n_words
|
||||||
|
else:
|
||||||
|
# Rolling update
|
||||||
|
wer = (wer * n_words + l_wer * l_n_words) / (n_words + l_n_words)
|
||||||
|
n_words = n_words + l_n_words
|
||||||
|
|
||||||
|
# Generate diff reports
|
||||||
char_diff_report += gen_diff_report(
|
char_diff_report += gen_diff_report(
|
||||||
gt_text, ocr_text, css_prefix="l{0}-c".format(k), joiner="", none="·"
|
gt_text, ocr_text, css_prefix="l{0}-c".format(k), joiner="", none="·"
|
||||||
)
|
)
|
||||||
|
|
||||||
gt_words = words_normalized(gt_text)
|
gt_words = words_normalized(gt_text)
|
||||||
ocr_words = words_normalized(ocr_text)
|
ocr_words = words_normalized(ocr_text)
|
||||||
word_diff_report += gen_diff_report(
|
word_diff_report += gen_diff_report(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue